summaryrefslogtreecommitdiffstats
path: root/fs/notify/fanotify/fanotify_user.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-30 19:04:31 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-30 19:04:31 +0200
commit3513431926f9bfe3f4fcb06a39d9ec59b0470311 (patch)
tree57ff608d89c031a9f0c84a6f4fe3ae98677467fb /fs/notify/fanotify/fanotify_user.c
parentvt_kdsetmode: extend console locking (diff)
parentfsnotify: optimize the case of no marks of any type (diff)
downloadlinux-3513431926f9bfe3f4fcb06a39d9ec59b0470311.tar.xz
linux-3513431926f9bfe3f4fcb06a39d9ec59b0470311.zip
Merge tag 'fsnotify_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify updates from Jan Kara: "fsnotify speedups when notification actually isn't used and support for identifying processes which caused fanotify events through pidfd instead of normal pid" * tag 'fsnotify_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: fsnotify: optimize the case of no marks of any type fsnotify: count all objects with attached connectors fsnotify: count s_fsnotify_inode_refs for attached connectors fsnotify: replace igrab() with ihold() on attach connector fanotify: add pidfd support to the fanotify API fanotify: introduce a generic info record copying helper fanotify: minor cosmetic adjustments to fid labels kernel/pid.c: implement additional checks upon pidfd_create() parameters kernel/pid.c: remove static qualifier from pidfd_create()
Diffstat (limited to 'fs/notify/fanotify/fanotify_user.c')
-rw-r--r--fs/notify/fanotify/fanotify_user.c251
1 files changed, 177 insertions, 74 deletions
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 28b67cb9458d..6facdf476255 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/fanotify.h>
#include <linux/fcntl.h>
+#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
@@ -109,8 +110,10 @@ struct kmem_cache *fanotify_path_event_cachep __read_mostly;
struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
#define FANOTIFY_EVENT_ALIGN 4
-#define FANOTIFY_INFO_HDR_LEN \
+#define FANOTIFY_FID_INFO_HDR_LEN \
(sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
+#define FANOTIFY_PIDFD_INFO_HDR_LEN \
+ sizeof(struct fanotify_event_info_pidfd)
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -119,10 +122,11 @@ static int fanotify_fid_info_len(int fh_len, int name_len)
if (name_len)
info_len += name_len + 1;
- return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN);
+ return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len,
+ FANOTIFY_EVENT_ALIGN);
}
-static int fanotify_event_info_len(unsigned int fid_mode,
+static int fanotify_event_info_len(unsigned int info_mode,
struct fanotify_event *event)
{
struct fanotify_info *info = fanotify_event_info(event);
@@ -133,7 +137,8 @@ static int fanotify_event_info_len(unsigned int fid_mode,
if (dir_fh_len) {
info_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
- } else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) {
+ } else if ((info_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
/*
* With group flag FAN_REPORT_NAME, if name was not recorded in
* event on a directory, we will report the name ".".
@@ -141,6 +146,9 @@ static int fanotify_event_info_len(unsigned int fid_mode,
dot_len = 1;
}
+ if (info_mode & FAN_REPORT_PIDFD)
+ info_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
+
if (fh_len)
info_len += fanotify_fid_info_len(fh_len, dot_len);
@@ -176,7 +184,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
size_t event_size = FAN_EVENT_METADATA_LEN;
struct fanotify_event *event = NULL;
struct fsnotify_event *fsn_event;
- unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
@@ -186,8 +194,8 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
goto out;
event = FANOTIFY_E(fsn_event);
- if (fid_mode)
- event_size += fanotify_event_info_len(fid_mode, event);
+ if (info_mode)
+ event_size += fanotify_event_info_len(info_mode, event);
if (event_size > count) {
event = ERR_PTR(-EINVAL);
@@ -308,9 +316,10 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
-static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
- int info_type, const char *name, size_t name_len,
- char __user *buf, size_t count)
+static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+ int info_type, const char *name,
+ size_t name_len,
+ char __user *buf, size_t count)
{
struct fanotify_event_info_fid info = { };
struct file_handle handle = { };
@@ -403,6 +412,117 @@ static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
return info_len;
}
+static int copy_pidfd_info_to_user(int pidfd,
+ char __user *buf,
+ size_t count)
+{
+ struct fanotify_event_info_pidfd info = { };
+ size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN;
+
+ if (WARN_ON_ONCE(info_len > count))
+ return -EFAULT;
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD;
+ info.hdr.len = info_len;
+ info.pidfd = pidfd;
+
+ if (copy_to_user(buf, &info, info_len))
+ return -EFAULT;
+
+ return info_len;
+}
+
+static int copy_info_records_to_user(struct fanotify_event *event,
+ struct fanotify_info *info,
+ unsigned int info_mode, int pidfd,
+ char __user *buf, size_t count)
+{
+ int ret, total_bytes = 0, info_type = 0;
+ unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS;
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
+
+ /*
+ * Event info records order is as follows: dir fid + name, child fid.
+ */
+ if (fanotify_event_dir_fh_len(event)) {
+ info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
+ FAN_EVENT_INFO_TYPE_DFID;
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+ fanotify_info_dir_fh(info),
+ info_type,
+ fanotify_info_name(info),
+ info->name_len, buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ if (fanotify_event_object_fh_len(event)) {
+ const char *dot = NULL;
+ int dot_len = 0;
+
+ if (fid_mode == FAN_REPORT_FID || info_type) {
+ /*
+ * With only group flag FAN_REPORT_FID only type FID is
+ * reported. Second info record type is always FID.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ } else if ((fid_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_NAME, if name was not
+ * recorded in an event on a directory, report the name
+ * "." with info type DFID_NAME.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
+ dot = ".";
+ dot_len = 1;
+ } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_DIR_FID, a single info
+ * record has type DFID for directory entry modification
+ * event and for event on a directory.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID;
+ } else {
+ /*
+ * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
+ * a single info record has type FID for event on a
+ * non-directory, when there is no directory to report.
+ * For example, on FAN_DELETE_SELF event.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ }
+
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+ fanotify_event_object_fh(event),
+ info_type, dot, dot_len,
+ buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ if (pidfd_mode) {
+ ret = copy_pidfd_info_to_user(pidfd, buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ return total_bytes;
+}
+
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fanotify_event *event,
char __user *buf, size_t count)
@@ -410,15 +530,15 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fanotify_event_metadata metadata;
struct path *path = fanotify_event_path(event);
struct fanotify_info *info = fanotify_event_info(event);
- unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
struct file *f = NULL;
- int ret, fd = FAN_NOFD;
- int info_type = 0;
+ int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
metadata.event_len = FAN_EVENT_METADATA_LEN +
- fanotify_event_info_len(fid_mode, event);
+ fanotify_event_info_len(info_mode, event);
metadata.metadata_len = FAN_EVENT_METADATA_LEN;
metadata.vers = FANOTIFY_METADATA_VERSION;
metadata.reserved = 0;
@@ -447,6 +567,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
}
metadata.fd = fd;
+ if (pidfd_mode) {
+ /*
+ * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual
+ * exclusion is ever lifted. At the time of incoporating pidfd
+ * support within fanotify, the pidfd API only supported the
+ * creation of pidfds for thread-group leaders.
+ */
+ WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID));
+
+ /*
+ * The PIDTYPE_TGID check for an event->pid is performed
+ * preemptively in an attempt to catch out cases where the event
+ * listener reads events after the event generating process has
+ * already terminated. Report FAN_NOPIDFD to the event listener
+ * in those cases, with all other pidfd creation errors being
+ * reported as FAN_EPIDFD.
+ */
+ if (metadata.pid == 0 ||
+ !pid_has_task(event->pid, PIDTYPE_TGID)) {
+ pidfd = FAN_NOPIDFD;
+ } else {
+ pidfd = pidfd_create(event->pid, 0);
+ if (pidfd < 0)
+ pidfd = FAN_EPIDFD;
+ }
+ }
+
ret = -EFAULT;
/*
* Sanity check copy size in case get_one_event() and
@@ -467,67 +614,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
if (f)
fd_install(fd, f);
- /* Event info records order is: dir fid + name, child fid */
- if (fanotify_event_dir_fh_len(event)) {
- info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
- FAN_EVENT_INFO_TYPE_DFID;
- ret = copy_info_to_user(fanotify_event_fsid(event),
- fanotify_info_dir_fh(info),
- info_type, fanotify_info_name(info),
- info->name_len, buf, count);
+ if (info_mode) {
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
if (ret < 0)
goto out_close_fd;
-
- buf += ret;
- count -= ret;
- }
-
- if (fanotify_event_object_fh_len(event)) {
- const char *dot = NULL;
- int dot_len = 0;
-
- if (fid_mode == FAN_REPORT_FID || info_type) {
- /*
- * With only group flag FAN_REPORT_FID only type FID is
- * reported. Second info record type is always FID.
- */
- info_type = FAN_EVENT_INFO_TYPE_FID;
- } else if ((fid_mode & FAN_REPORT_NAME) &&
- (event->mask & FAN_ONDIR)) {
- /*
- * With group flag FAN_REPORT_NAME, if name was not
- * recorded in an event on a directory, report the
- * name "." with info type DFID_NAME.
- */
- info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
- dot = ".";
- dot_len = 1;
- } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
- (event->mask & FAN_ONDIR)) {
- /*
- * With group flag FAN_REPORT_DIR_FID, a single info
- * record has type DFID for directory entry modification
- * event and for event on a directory.
- */
- info_type = FAN_EVENT_INFO_TYPE_DFID;
- } else {
- /*
- * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
- * a single info record has type FID for event on a
- * non-directory, when there is no directory to report.
- * For example, on FAN_DELETE_SELF event.
- */
- info_type = FAN_EVENT_INFO_TYPE_FID;
- }
-
- ret = copy_info_to_user(fanotify_event_fsid(event),
- fanotify_event_object_fh(event),
- info_type, dot, dot_len, buf, count);
- if (ret < 0)
- goto out_close_fd;
-
- buf += ret;
- count -= ret;
}
return metadata.event_len;
@@ -537,6 +628,10 @@ out_close_fd:
put_unused_fd(fd);
fput(f);
}
+
+ if (pidfd >= 0)
+ close_fd(pidfd);
+
return ret;
}
@@ -1082,6 +1177,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
#endif
return -EINVAL;
+ /*
+ * A pidfd can only be returned for a thread-group leader; thus
+ * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually
+ * exclusive.
+ */
+ if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
+ return -EINVAL;
+
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@@ -1483,7 +1586,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 11);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,