summaryrefslogtreecommitdiffstats
path: root/fs/tracefs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-09-02 01:34:25 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2023-09-02 01:34:25 +0200
commit34232fcfe9a383bea802af682baae5c99f22376c (patch)
tree7935364548ebb29533b499728fac1e48c373e269 /fs/tracefs
parentMerge tag 'wq-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq (diff)
parenttracefs: Remove kerneldoc from struct eventfs_file (diff)
downloadlinux-34232fcfe9a383bea802af682baae5c99f22376c.tar.xz
linux-34232fcfe9a383bea802af682baae5c99f22376c.zip
Merge tag 'trace-v6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing updates from Steven Rostedt: "User visible changes: - Added a way to easier filter with cpumasks: # echo 'cpumask & CPUS{17-42}' > /sys/kernel/tracing/events/ipi_send_cpumask/filter - Show actual size of ring buffer after modifying the ring buffer size via buffer_size_kb. Currently it just returns what was written, but the actual size rounds up to the sub buffer size. Show that real size instead. Major changes: - Added "eventfs". This is the code that handles the inodes and dentries of tracefs/events directory. As there are thousands of events, and each event has several inodes and dentries that currently exist even when tracing is never used, they take up precious memory. Instead, eventfs will allocate the inodes and dentries in a JIT way (similar to what procfs does). There is now metadata that handles the events and subdirectories, and will create the inodes and dentries when they are used. Note, I also have patches that remove the subdirectory meta data, but will wait till the next merge window before applying them. It's a little more complex, and I want to make sure the dynamic code works properly before adding more complexity, making it easier to revert if need be. Minor changes: - Optimization to user event list traversal - Remove intermediate permission of tracefs files (note the intermediate permission removes all access to the files so it is not a security concern, but just a clean up) - Add the complex fix to FORTIFY_SOURCE to the kernel stack event logic - Other minor cleanups" * tag 'trace-v6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (29 commits) tracefs: Remove kerneldoc from struct eventfs_file tracefs: Avoid changing i_mode to a temp value tracing/user_events: Optimize safe list traversals ftrace: Remove empty declaration ftrace_enable_daemon() and ftrace_disable_daemon() tracing: Remove unused function declarations tracing/filters: Document cpumask filtering tracing/filters: Further optimise scalar vs cpumask comparison tracing/filters: Optimise CPU vs cpumask filtering when the user mask is a single CPU tracing/filters: Optimise scalar vs cpumask filtering when the user mask is a single CPU tracing/filters: Optimise cpumask vs cpumask filtering when user mask is a single CPU tracing/filters: Enable filtering the CPU common field by a cpumask tracing/filters: Enable filtering a scalar field by a cpumask tracing/filters: Enable filtering a cpumask field by another cpumask tracing/filters: Dynamically allocate filter_pred.regex test: ftrace: Fix kprobe test for eventfs eventfs: Move tracing/events to eventfs eventfs: Implement removal of meta data from eventfs eventfs: Implement functions to create files and dirs when accessed eventfs: Implement eventfs lookup, read, open functions eventfs: Implement eventfs file add functions ...
Diffstat (limited to 'fs/tracefs')
-rw-r--r--fs/tracefs/Makefile1
-rw-r--r--fs/tracefs/event_inode.c807
-rw-r--r--fs/tracefs/inode.c157
-rw-r--r--fs/tracefs/internal.h29
4 files changed, 982 insertions, 12 deletions
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
index 7c35a282b484..73c56da8e284 100644
--- a/fs/tracefs/Makefile
+++ b/fs/tracefs/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
tracefs-objs := inode.o
+tracefs-objs += event_inode.o
obj-$(CONFIG_TRACING) += tracefs.o
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
new file mode 100644
index 000000000000..237c6f370ad9
--- /dev/null
+++ b/fs/tracefs/event_inode.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * event_inode.c - part of tracefs, a pseudo file system for activating tracing
+ *
+ * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt (VMware) <rostedt@goodmis.org>
+ * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
+ *
+ * eventfs is used to dynamically create inodes and dentries based on the
+ * meta data provided by the tracing system.
+ *
+ * eventfs stores the meta-data of files/dirs and holds off on creating
+ * inodes/dentries of the files. When accessed, the eventfs will create the
+ * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
+ * and delete the inodes/dentries when they are no longer referenced.
+ */
+#include <linux/fsnotify.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+#include <linux/tracefs.h>
+#include <linux/kref.h>
+#include <linux/delay.h>
+#include "internal.h"
+
+struct eventfs_inode {
+ struct list_head e_top_files;
+};
+
+/*
+ * struct eventfs_file - hold the properties of the eventfs files and
+ * directories.
+ * @name: the name of the file or directory to create
+ * @d_parent: holds parent's dentry
+ * @dentry: once accessed holds dentry
+ * @list: file or directory to be added to parent directory
+ * @ei: list of files and directories within directory
+ * @fop: file_operations for file or directory
+ * @iop: inode_operations for file or directory
+ * @data: something that the caller will want to get to later on
+ * @mode: the permission that the file or directory should have
+ */
+struct eventfs_file {
+ const char *name;
+ struct dentry *d_parent;
+ struct dentry *dentry;
+ struct list_head list;
+ struct eventfs_inode *ei;
+ const struct file_operations *fop;
+ const struct inode_operations *iop;
+ /*
+ * Union - used for deletion
+ * @del_list: list of eventfs_file to delete
+ * @rcu: eventfs_file to delete in RCU
+ * @is_freed: node is freed if one of the above is set
+ */
+ union {
+ struct list_head del_list;
+ struct rcu_head rcu;
+ unsigned long is_freed;
+ };
+ void *data;
+ umode_t mode;
+};
+
+static DEFINE_MUTEX(eventfs_mutex);
+DEFINE_STATIC_SRCU(eventfs_srcu);
+
+static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags);
+static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
+static int eventfs_release(struct inode *inode, struct file *file);
+
+static const struct inode_operations eventfs_root_dir_inode_operations = {
+ .lookup = eventfs_root_lookup,
+};
+
+static const struct file_operations eventfs_file_operations = {
+ .open = dcache_dir_open_wrapper,
+ .read = generic_read_dir,
+ .iterate_shared = dcache_readdir,
+ .llseek = generic_file_llseek,
+ .release = eventfs_release,
+};
+
+/**
+ * create_file - create a file in the tracefs filesystem
+ * @name: the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+ * This is the basic "create a file" function for tracefs. It allows for a
+ * wide range of flexibility in creating a file.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+static struct dentry *create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fop)
+{
+ struct tracefs_inode *ti;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+
+ if (WARN_ON_ONCE(!S_ISREG(mode)))
+ return NULL;
+
+ dentry = eventfs_start_creating(name, parent);
+
+ if (IS_ERR(dentry))
+ return dentry;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+ inode->i_mode = mode;
+ inode->i_fop = fop;
+ inode->i_private = data;
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+ d_instantiate(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+ return eventfs_end_creating(dentry);
+};
+
+/**
+ * create_dir - create a dir in the tracefs filesystem
+ * @name: the name of the file to create.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ *
+ * This is the basic "create a dir" function for eventfs. It allows for a
+ * wide range of flexibility in creating a dir.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+static struct dentry *create_dir(const char *name, struct dentry *parent, void *data)
+{
+ struct tracefs_inode *ti;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ dentry = eventfs_start_creating(name, parent);
+ if (IS_ERR(dentry))
+ return dentry;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return eventfs_failed_creating(dentry);
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+ inode->i_private = data;
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return eventfs_end_creating(dentry);
+}
+
+/**
+ * eventfs_set_ef_status_free - set the ef->status to free
+ * @dentry: dentry who's status to be freed
+ *
+ * eventfs_set_ef_status_free will be called if no more
+ * references remain
+ */
+void eventfs_set_ef_status_free(struct dentry *dentry)
+{
+ struct tracefs_inode *ti_parent;
+ struct eventfs_file *ef;
+
+ mutex_lock(&eventfs_mutex);
+ ti_parent = get_tracefs(dentry->d_parent->d_inode);
+ if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE))
+ goto out;
+
+ ef = dentry->d_fsdata;
+ if (!ef)
+ goto out;
+
+ /*
+ * If ef was freed, then the LSB bit is set for d_fsdata.
+ * But this should not happen, as it should still have a
+ * ref count that prevents it. Warn in case it does.
+ */
+ if (WARN_ON_ONCE((unsigned long)ef & 1))
+ goto out;
+
+ dentry->d_fsdata = NULL;
+ ef->dentry = NULL;
+out:
+ mutex_unlock(&eventfs_mutex);
+}
+
+/**
+ * eventfs_post_create_dir - post create dir routine
+ * @ef: eventfs_file of recently created dir
+ *
+ * Map the meta-data of files within an eventfs dir to their parent dentry
+ */
+static void eventfs_post_create_dir(struct eventfs_file *ef)
+{
+ struct eventfs_file *ef_child;
+ struct tracefs_inode *ti;
+
+ /* srcu lock already held */
+ /* fill parent-child relation */
+ list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+ ef_child->d_parent = ef->dentry;
+ }
+
+ ti = get_tracefs(ef->dentry->d_inode);
+ ti->private = ef->ei;
+}
+
+/**
+ * create_dentry - helper function to create dentry
+ * @ef: eventfs_file of file or directory to create
+ * @parent: parent dentry
+ * @lookup: true if called from lookup routine
+ *
+ * Used to create a dentry for file/dir, executes post dentry creation routine
+ */
+static struct dentry *
+create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup)
+{
+ bool invalidate = false;
+ struct dentry *dentry;
+
+ mutex_lock(&eventfs_mutex);
+ if (ef->is_freed) {
+ mutex_unlock(&eventfs_mutex);
+ return NULL;
+ }
+ if (ef->dentry) {
+ dentry = ef->dentry;
+ /* On dir open, up the ref count */
+ if (!lookup)
+ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+ }
+ mutex_unlock(&eventfs_mutex);
+
+ if (!lookup)
+ inode_lock(parent->d_inode);
+
+ if (ef->ei)
+ dentry = create_dir(ef->name, parent, ef->data);
+ else
+ dentry = create_file(ef->name, ef->mode, parent,
+ ef->data, ef->fop);
+
+ if (!lookup)
+ inode_unlock(parent->d_inode);
+
+ mutex_lock(&eventfs_mutex);
+ if (IS_ERR_OR_NULL(dentry)) {
+ /* If the ef was already updated get it */
+ dentry = ef->dentry;
+ if (dentry && !lookup)
+ dget(dentry);
+ mutex_unlock(&eventfs_mutex);
+ return dentry;
+ }
+
+ if (!ef->dentry && !ef->is_freed) {
+ ef->dentry = dentry;
+ if (ef->ei)
+ eventfs_post_create_dir(ef);
+ dentry->d_fsdata = ef;
+ } else {
+ /* A race here, should try again (unless freed) */
+ invalidate = true;
+
+ /*
+ * Should never happen unless we get here due to being freed.
+ * Otherwise it means two dentries exist with the same name.
+ */
+ WARN_ON_ONCE(!ef->is_freed);
+ }
+ mutex_unlock(&eventfs_mutex);
+ if (invalidate)
+ d_invalidate(dentry);
+
+ if (lookup || invalidate)
+ dput(dentry);
+
+ return invalidate ? NULL : dentry;
+}
+
+static bool match_event_file(struct eventfs_file *ef, const char *name)
+{
+ bool ret;
+
+ mutex_lock(&eventfs_mutex);
+ ret = !ef->is_freed && strcmp(ef->name, name) == 0;
+ mutex_unlock(&eventfs_mutex);
+
+ return ret;
+}
+
+/**
+ * eventfs_root_lookup - lookup routine to create file/dir
+ * @dir: in which a lookup is being done
+ * @dentry: file/dir dentry
+ * @flags: to pass as flags parameter to simple lookup
+ *
+ * Used to create a dynamic file/dir within @dir. Use the eventfs_inode
+ * list of meta data to find the information needed to create the file/dir.
+ */
+static struct dentry *eventfs_root_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+ struct dentry *ret = NULL;
+ int idx;
+
+ ti = get_tracefs(dir);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return NULL;
+
+ ei = ti->private;
+ idx = srcu_read_lock(&eventfs_srcu);
+ list_for_each_entry_srcu(ef, &ei->e_top_files, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+ if (!match_event_file(ef, dentry->d_name.name))
+ continue;
+ ret = simple_lookup(dir, dentry, flags);
+ create_dentry(ef, ef->d_parent, true);
+ break;
+ }
+ srcu_read_unlock(&eventfs_srcu, idx);
+ return ret;
+}
+
+/**
+ * eventfs_release - called to release eventfs file/dir
+ * @inode: inode to be released
+ * @file: file to be released (not used)
+ */
+static int eventfs_release(struct inode *inode, struct file *file)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+ struct dentry *dentry;
+ int idx;
+
+ ti = get_tracefs(inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return -EINVAL;
+
+ ei = ti->private;
+ idx = srcu_read_lock(&eventfs_srcu);
+ list_for_each_entry_srcu(ef, &ei->e_top_files, list,
+ srcu_read_lock_held(&eventfs_srcu)) {
+ mutex_lock(&eventfs_mutex);
+ dentry = ef->dentry;
+ mutex_unlock(&eventfs_mutex);
+ if (dentry)
+ dput(dentry);
+ }
+ srcu_read_unlock(&eventfs_srcu, idx);
+ return dcache_dir_close(inode, file);
+}
+
+/**
+ * dcache_dir_open_wrapper - eventfs open wrapper
+ * @inode: not used
+ * @file: dir to be opened (to create its child)
+ *
+ * Used to dynamically create the file/dir within @file. @file is really a
+ * directory and all the files/dirs of the children within @file will be
+ * created. If any of the files/dirs have already been created, their
+ * reference count will be incremented.
+ */
+static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+ struct dentry *dentry = file_dentry(file);
+ struct inode *f_inode = file_inode(file);
+ int idx;
+
+ ti = get_tracefs(f_inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return -EINVAL;
+
+ ei = ti->private;
+ idx = srcu_read_lock(&eventfs_srcu);
+ list_for_each_entry_rcu(ef, &ei->e_top_files, list) {
+ create_dentry(ef, dentry, false);
+ }
+ srcu_read_unlock(&eventfs_srcu, idx);
+ return dcache_dir_open(inode, file);
+}
+
+/**
+ * eventfs_prepare_ef - helper function to prepare eventfs_file
+ * @name: the name of the file/directory to create.
+ * @mode: the permission that the file should have.
+ * @fop: struct file_operations that should be used for this file/directory.
+ * @iop: struct inode_operations that should be used for this file/directory.
+ * @data: something that the caller will want to get to later on. The
+ * inode.i_private pointer will point to this value on the open() call.
+ *
+ * This function allocates and fills the eventfs_file structure.
+ */
+static struct eventfs_file *eventfs_prepare_ef(const char *name, umode_t mode,
+ const struct file_operations *fop,
+ const struct inode_operations *iop,
+ void *data)
+{
+ struct eventfs_file *ef;
+
+ ef = kzalloc(sizeof(*ef), GFP_KERNEL);
+ if (!ef)
+ return ERR_PTR(-ENOMEM);
+
+ ef->name = kstrdup(name, GFP_KERNEL);
+ if (!ef->name) {
+ kfree(ef);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (S_ISDIR(mode)) {
+ ef->ei = kzalloc(sizeof(*ef->ei), GFP_KERNEL);
+ if (!ef->ei) {
+ kfree(ef->name);
+ kfree(ef);
+ return ERR_PTR(-ENOMEM);
+ }
+ INIT_LIST_HEAD(&ef->ei->e_top_files);
+ } else {
+ ef->ei = NULL;
+ }
+
+ ef->iop = iop;
+ ef->fop = fop;
+ ef->mode = mode;
+ ef->data = data;
+ return ef;
+}
+
+/**
+ * eventfs_create_events_dir - create the trace event structure
+ * @name: the name of the directory to create.
+ * @parent: parent dentry for this file. This should be a directory dentry
+ * if set. If this parameter is NULL, then the directory will be
+ * created in the root of the tracefs filesystem.
+ *
+ * This function creates the top of the trace event directory.
+ */
+struct dentry *eventfs_create_events_dir(const char *name,
+ struct dentry *parent)
+{
+ struct dentry *dentry = tracefs_start_creating(name, parent);
+ struct eventfs_inode *ei;
+ struct tracefs_inode *ti;
+ struct inode *inode;
+
+ if (IS_ERR(dentry))
+ return dentry;
+
+ ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ if (!ei)
+ return ERR_PTR(-ENOMEM);
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode)) {
+ kfree(ei);
+ tracefs_failed_creating(dentry);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&ei->e_top_files);
+
+ ti = get_tracefs(inode);
+ ti->flags |= TRACEFS_EVENT_INODE;
+ ti->private = ei;
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_fop = &eventfs_file_operations;
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return tracefs_end_creating(dentry);
+}
+
+/**
+ * eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later
+ * @name: the name of the file to create.
+ * @parent: parent dentry for this dir.
+ *
+ * This function adds eventfs subsystem dir to list.
+ * And all these dirs are created on the fly when they are looked up,
+ * and the dentry and inodes will be removed when they are done.
+ */
+struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
+ struct dentry *parent)
+{
+ struct tracefs_inode *ti_parent;
+ struct eventfs_inode *ei_parent;
+ struct eventfs_file *ef;
+
+ if (!parent)
+ return ERR_PTR(-EINVAL);
+
+ ti_parent = get_tracefs(parent->d_inode);
+ ei_parent = ti_parent->private;
+
+ ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+ if (IS_ERR(ef))
+ return ef;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ei_parent->e_top_files);
+ ef->d_parent = parent;
+ mutex_unlock(&eventfs_mutex);
+ return ef;
+}
+
+/**
+ * eventfs_add_dir - add eventfs dir to list to create later
+ * @name: the name of the file to create.
+ * @ef_parent: parent eventfs_file for this dir.
+ *
+ * This function adds eventfs dir to list.
+ * And all these dirs are created on the fly when they are looked up,
+ * and the dentry and inodes will be removed when they are done.
+ */
+struct eventfs_file *eventfs_add_dir(const char *name,
+ struct eventfs_file *ef_parent)
+{
+ struct eventfs_file *ef;
+
+ if (!ef_parent)
+ return ERR_PTR(-EINVAL);
+
+ ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL);
+ if (IS_ERR(ef))
+ return ef;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+ ef->d_parent = ef_parent->dentry;
+ mutex_unlock(&eventfs_mutex);
+ return ef;
+}
+
+/**
+ * eventfs_add_events_file - add the data needed to create a file for later reference
+ * @name: the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: parent dentry for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+ * This function is used to add the information needed to create a
+ * dentry/inode within the top level events directory. The file created
+ * will have the @mode permissions. The @data will be used to fill the
+ * inode.i_private when the open() call is done. The dentry and inodes are
+ * all created when they are referenced, and removed when they are no
+ * longer referenced.
+ */
+int eventfs_add_events_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fop)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+ struct eventfs_file *ef;
+
+ if (!parent)
+ return -EINVAL;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+
+ if (!parent->d_inode)
+ return -EINVAL;
+
+ ti = get_tracefs(parent->d_inode);
+ if (!(ti->flags & TRACEFS_EVENT_INODE))
+ return -EINVAL;
+
+ ei = ti->private;
+ ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+
+ if (IS_ERR(ef))
+ return -ENOMEM;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ei->e_top_files);
+ ef->d_parent = parent;
+ mutex_unlock(&eventfs_mutex);
+ return 0;
+}
+
+/**
+ * eventfs_add_file - add eventfs file to list to create later
+ * @name: the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @ef_parent: parent eventfs_file for this file.
+ * @data: something that the caller will want to get to later on.
+ * @fop: struct file_operations that should be used for this file.
+ *
+ * This function is used to add the information needed to create a
+ * file within a subdirectory of the events directory. The file created
+ * will have the @mode permissions. The @data will be used to fill the
+ * inode.i_private when the open() call is done. The dentry and inodes are
+ * all created when they are referenced, and removed when they are no
+ * longer referenced.
+ */
+int eventfs_add_file(const char *name, umode_t mode,
+ struct eventfs_file *ef_parent,
+ void *data,
+ const struct file_operations *fop)
+{
+ struct eventfs_file *ef;
+
+ if (!ef_parent)
+ return -EINVAL;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+
+ ef = eventfs_prepare_ef(name, mode, fop, NULL, data);
+ if (IS_ERR(ef))
+ return -ENOMEM;
+
+ mutex_lock(&eventfs_mutex);
+ list_add_tail(&ef->list, &ef_parent->ei->e_top_files);
+ ef->d_parent = ef_parent->dentry;
+ mutex_unlock(&eventfs_mutex);
+ return 0;
+}
+
+static void free_ef(struct rcu_head *head)
+{
+ struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
+
+ kfree(ef->name);
+ kfree(ef->ei);
+ kfree(ef);
+}
+
+/**
+ * eventfs_remove_rec - remove eventfs dir or file from list
+ * @ef: eventfs_file to be removed.
+ * @head: to create list of eventfs_file to be deleted
+ * @level: to check recursion depth
+ *
+ * The helper function eventfs_remove_rec() is used to clean up and free the
+ * associated data from eventfs for both of the added functions.
+ */
+static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level)
+{
+ struct eventfs_file *ef_child;
+
+ if (!ef)
+ return;
+ /*
+ * Check recursion depth. It should never be greater than 3:
+ * 0 - events/
+ * 1 - events/group/
+ * 2 - events/group/event/
+ * 3 - events/group/event/file
+ */
+ if (WARN_ON_ONCE(level > 3))
+ return;
+
+ if (ef->ei) {
+ /* search for nested folders or files */
+ list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
+ lockdep_is_held(&eventfs_mutex)) {
+ eventfs_remove_rec(ef_child, head, level + 1);
+ }
+ }
+
+ list_del_rcu(&ef->list);
+ list_add_tail(&ef->del_list, head);
+}
+
+/**
+ * eventfs_remove - remove eventfs dir or file from list
+ * @ef: eventfs_file to be removed.
+ *
+ * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
+ */
+void eventfs_remove(struct eventfs_file *ef)
+{
+ struct eventfs_file *tmp;
+ LIST_HEAD(ef_del_list);
+ struct dentry *dentry_list = NULL;
+ struct dentry *dentry;
+
+ if (!ef)
+ return;
+
+ mutex_lock(&eventfs_mutex);
+ eventfs_remove_rec(ef, &ef_del_list, 0);
+ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+ if (ef->dentry) {
+ unsigned long ptr = (unsigned long)dentry_list;
+
+ /* Keep the dentry from being freed yet */
+ dget(ef->dentry);
+
+ /*
+ * Paranoid: The dget() above should prevent the dentry
+ * from being freed and calling eventfs_set_ef_status_free().
+ * But just in case, set the link list LSB pointer to 1
+ * and have eventfs_set_ef_status_free() check that to
+ * make sure that if it does happen, it will not think
+ * the d_fsdata is an event_file.
+ *
+ * For this to work, no event_file should be allocated
+ * on a odd space, as the ef should always be allocated
+ * to be at least word aligned. Check for that too.
+ */
+ WARN_ON_ONCE(ptr & 1);
+
+ ef->dentry->d_fsdata = (void *)(ptr | 1);
+ dentry_list = ef->dentry;
+ ef->dentry = NULL;
+ }
+ call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
+ }
+ mutex_unlock(&eventfs_mutex);
+
+ while (dentry_list) {
+ unsigned long ptr;
+
+ dentry = dentry_list;
+ ptr = (unsigned long)dentry->d_fsdata & ~1UL;
+ dentry_list = (struct dentry *)ptr;
+ dentry->d_fsdata = NULL;
+ d_invalidate(dentry);
+ mutex_lock(&eventfs_mutex);
+ /* dentry should now have at least a single reference */
+ WARN_ONCE((int)d_count(dentry) < 1,
+ "dentry %p less than one reference (%d) after invalidate\n",
+ dentry, d_count(dentry));
+ mutex_unlock(&eventfs_mutex);
+ dput(dentry);
+ }
+}
+
+/**
+ * eventfs_remove_events_dir - remove eventfs dir or file from list
+ * @dentry: events's dentry to be removed.
+ *
+ * This function remove events main directory
+ */
+void eventfs_remove_events_dir(struct dentry *dentry)
+{
+ struct tracefs_inode *ti;
+ struct eventfs_inode *ei;
+
+ if (!dentry || !dentry->d_inode)
+ return;
+
+ ti = get_tracefs(dentry->d_inode);
+ if (!ti || !(ti->flags & TRACEFS_EVENT_INODE))
+ return;
+
+ ei = ti->private;
+ d_invalidate(dentry);
+ dput(dentry);
+ kfree(ei);
+}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 2feb6c58648c..de5b72216b1a 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -21,13 +21,33 @@
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include "internal.h"
#define TRACEFS_DEFAULT_MODE 0700
+static struct kmem_cache *tracefs_inode_cachep __ro_after_init;
static struct vfsmount *tracefs_mount;
static int tracefs_mount_count;
static bool tracefs_registered;
+static struct inode *tracefs_alloc_inode(struct super_block *sb)
+{
+ struct tracefs_inode *ti;
+
+ ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL);
+ if (!ti)
+ return NULL;
+
+ ti->flags = 0;
+
+ return &ti->vfs_inode;
+}
+
+static void tracefs_free_inode(struct inode *inode)
+{
+ kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode));
+}
+
static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -127,7 +147,7 @@ static const struct inode_operations tracefs_dir_inode_operations = {
.rmdir = tracefs_syscall_rmdir,
};
-static struct inode *tracefs_get_inode(struct super_block *sb)
+struct inode *tracefs_get_inode(struct super_block *sb)
{
struct inode *inode = new_inode(sb);
if (inode) {
@@ -290,6 +310,7 @@ static int tracefs_apply_options(struct super_block *sb, bool remount)
struct tracefs_fs_info *fsi = sb->s_fs_info;
struct inode *inode = d_inode(sb->s_root);
struct tracefs_mount_opts *opts = &fsi->mount_opts;
+ umode_t tmp_mode;
/*
* On remount, only reset mode/uid/gid if they were provided as mount
@@ -297,8 +318,9 @@ static int tracefs_apply_options(struct super_block *sb, bool remount)
*/
if (!remount || opts->opts & BIT(Opt_mode)) {
- inode->i_mode &= ~S_IALLUGO;
- inode->i_mode |= opts->mode;
+ tmp_mode = READ_ONCE(inode->i_mode) & ~S_IALLUGO;
+ tmp_mode |= opts->mode;
+ WRITE_ONCE(inode->i_mode, tmp_mode);
}
if (!remount || opts->opts & BIT(Opt_uid))
@@ -346,11 +368,31 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
}
static const struct super_operations tracefs_super_operations = {
+ .alloc_inode = tracefs_alloc_inode,
+ .free_inode = tracefs_free_inode,
+ .drop_inode = generic_delete_inode,
.statfs = simple_statfs,
.remount_fs = tracefs_remount,
.show_options = tracefs_show_options,
};
+static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+ struct tracefs_inode *ti;
+
+ if (!dentry || !inode)
+ return;
+
+ ti = get_tracefs(inode);
+ if (ti && ti->flags & TRACEFS_EVENT_INODE)
+ eventfs_set_ef_status_free(dentry);
+ iput(inode);
+}
+
+static const struct dentry_operations tracefs_dentry_operations = {
+ .d_iput = tracefs_dentry_iput,
+};
+
static int trace_fill_super(struct super_block *sb, void *data, int silent)
{
static const struct tree_descr trace_files[] = {{""}};
@@ -373,6 +415,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
goto fail;
sb->s_op = &tracefs_super_operations;
+ sb->s_d_op = &tracefs_dentry_operations;
tracefs_apply_options(sb, false);
@@ -399,7 +442,7 @@ static struct file_system_type trace_fs_type = {
};
MODULE_ALIAS_FS("tracefs");
-static struct dentry *start_creating(const char *name, struct dentry *parent)
+struct dentry *tracefs_start_creating(const char *name, struct dentry *parent)
{
struct dentry *dentry;
int error;
@@ -437,7 +480,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
return dentry;
}
-static struct dentry *failed_creating(struct dentry *dentry)
+struct dentry *tracefs_failed_creating(struct dentry *dentry)
{
inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
@@ -445,13 +488,87 @@ static struct dentry *failed_creating(struct dentry *dentry)
return NULL;
}
-static struct dentry *end_creating(struct dentry *dentry)
+struct dentry *tracefs_end_creating(struct dentry *dentry)
{
inode_unlock(d_inode(dentry->d_parent));
return dentry;
}
/**
+ * eventfs_start_creating - start the process of creating a dentry
+ * @name: Name of the file created for the dentry
+ * @parent: The parent dentry where this dentry will be created
+ *
+ * This is a simple helper function for the dynamically created eventfs
+ * files. When the directory of the eventfs files are accessed, their
+ * dentries are created on the fly. This function is used to start that
+ * process.
+ */
+struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+ int error;
+
+ error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+ &tracefs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+
+ /*
+ * If the parent is not specified, we create it in the root.
+ * We need the root dentry to do this, which is in the super
+ * block. A pointer to that is in the struct vfsmount that we
+ * have around.
+ */
+ if (!parent)
+ parent = tracefs_mount->mnt_root;
+
+ if (unlikely(IS_DEADDIR(parent->d_inode)))
+ dentry = ERR_PTR(-ENOENT);
+ else
+ dentry = lookup_one_len(name, parent, strlen(name));
+
+ if (!IS_ERR(dentry) && dentry->d_inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-EEXIST);
+ }
+
+ if (IS_ERR(dentry))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+
+ return dentry;
+}
+
+/**
+ * eventfs_failed_creating - clean up a failed eventfs dentry creation
+ * @dentry: The dentry to clean up
+ *
+ * If after calling eventfs_start_creating(), a failure is detected, the
+ * resources created by eventfs_start_creating() needs to be cleaned up. In
+ * that case, this function should be called to perform that clean up.
+ */
+struct dentry *eventfs_failed_creating(struct dentry *dentry)
+{
+ dput(dentry);
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ return NULL;
+}
+
+/**
+ * eventfs_end_creating - Finish the process of creating a eventfs dentry
+ * @dentry: The dentry that has successfully been created.
+ *
+ * This function is currently just a place holder to match
+ * eventfs_start_creating(). In case any synchronization needs to be added,
+ * this function will be used to implement that without having to modify
+ * the callers of eventfs_start_creating().
+ */
+struct dentry *eventfs_end_creating(struct dentry *dentry)
+{
+ return dentry;
+}
+
+/**
* tracefs_create_file - create a file in the tracefs filesystem
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have.
@@ -490,14 +607,14 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
if (!(mode & S_IFMT))
mode |= S_IFREG;
BUG_ON(!S_ISREG(mode));
- dentry = start_creating(name, parent);
+ dentry = tracefs_start_creating(name, parent);
if (IS_ERR(dentry))
return NULL;
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return failed_creating(dentry);
+ return tracefs_failed_creating(dentry);
inode->i_mode = mode;
inode->i_fop = fops ? fops : &tracefs_file_operations;
@@ -506,13 +623,13 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
fsnotify_create(d_inode(dentry->d_parent), dentry);
- return end_creating(dentry);
+ return tracefs_end_creating(dentry);
}
static struct dentry *__create_dir(const char *name, struct dentry *parent,
const struct inode_operations *ops)
{
- struct dentry *dentry = start_creating(name, parent);
+ struct dentry *dentry = tracefs_start_creating(name, parent);
struct inode *inode;
if (IS_ERR(dentry))
@@ -520,7 +637,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
- return failed_creating(dentry);
+ return tracefs_failed_creating(dentry);
/* Do not set bits for OTH */
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
@@ -534,7 +651,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
d_instantiate(dentry, inode);
inc_nlink(d_inode(dentry->d_parent));
fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
- return end_creating(dentry);
+ return tracefs_end_creating(dentry);
}
/**
@@ -628,10 +745,26 @@ bool tracefs_initialized(void)
return tracefs_registered;
}
+static void init_once(void *foo)
+{
+ struct tracefs_inode *ti = (struct tracefs_inode *) foo;
+
+ inode_init_once(&ti->vfs_inode);
+}
+
static int __init tracefs_init(void)
{
int retval;
+ tracefs_inode_cachep = kmem_cache_create("tracefs_inode_cache",
+ sizeof(struct tracefs_inode),
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD|
+ SLAB_ACCOUNT),
+ init_once);
+ if (!tracefs_inode_cachep)
+ return -ENOMEM;
+
retval = sysfs_create_mount_point(kernel_kobj, "tracing");
if (retval)
return -EINVAL;
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
new file mode 100644
index 000000000000..69c2b1d87c46
--- /dev/null
+++ b/fs/tracefs/internal.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TRACEFS_INTERNAL_H
+#define _TRACEFS_INTERNAL_H
+
+enum {
+ TRACEFS_EVENT_INODE = BIT(1),
+};
+
+struct tracefs_inode {
+ unsigned long flags;
+ void *private;
+ struct inode vfs_inode;
+};
+
+static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
+{
+ return container_of(inode, struct tracefs_inode, vfs_inode);
+}
+
+struct dentry *tracefs_start_creating(const char *name, struct dentry *parent);
+struct dentry *tracefs_end_creating(struct dentry *dentry);
+struct dentry *tracefs_failed_creating(struct dentry *dentry);
+struct inode *tracefs_get_inode(struct super_block *sb);
+struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
+struct dentry *eventfs_failed_creating(struct dentry *dentry);
+struct dentry *eventfs_end_creating(struct dentry *dentry);
+void eventfs_set_ef_status_free(struct dentry *dentry);
+
+#endif /* _TRACEFS_INTERNAL_H */