diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 20:05:15 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 20:05:15 +0200 |
commit | 5133cd7518758211e827481e7d5053333bb926f0 (patch) | |
tree | f4a78efebcb2781c5ef41b01bdc3b03ee4aae7a4 | |
parent | Merge tag 'for-4.12/dm-post-merge-changes' of git://git.kernel.org/pub/scm/li... (diff) | |
parent | fanotify: don't expose EOPENSTALE to userspace (diff) | |
download | linux-5133cd7518758211e827481e7d5053333bb926f0.tar.xz linux-5133cd7518758211e827481e7d5053333bb926f0.zip |
Merge branch 'fsnotify' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify updates from Jan Kara:
"The branch contains mainly a rework of fsnotify infrastructure fixing
a shortcoming that we have waited for response to fanotify permission
events with SRCU read lock held and when the process consuming events
was slow to respond the kernel has stalled.
It also contains several cleanups of unnecessary indirections in
fsnotify framework and a bugfix from Amir fixing leakage of kernel
internal errno to userspace"
* 'fsnotify' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (37 commits)
fanotify: don't expose EOPENSTALE to userspace
fsnotify: remove a stray unlock
fsnotify: Move ->free_mark callback to fsnotify_ops
fsnotify: Add group pointer in fsnotify_init_mark()
fsnotify: Drop inode_mark.c
fsnotify: Remove fsnotify_find_{inode|vfsmount}_mark()
fsnotify: Remove fsnotify_detach_group_marks()
fsnotify: Rename fsnotify_clear_marks_by_group_flags()
fsnotify: Inline fsnotify_clear_{inode|vfsmount}_mark_group()
fsnotify: Remove fsnotify_recalc_{inode|vfsmount}_mask()
fsnotify: Remove fsnotify_set_mark_{,ignored_}mask_locked()
fanotify: Release SRCU lock when waiting for userspace response
fsnotify: Pass fsnotify_iter_info into handle_event handler
fsnotify: Provide framework for dropping SRCU lock in ->handle_event
fsnotify: Remove special handling of mark destruction on group shutdown
fsnotify: Detach mark from object list when last reference is dropped
fsnotify: Move queueing of mark for destruction into fsnotify_put_mark()
inotify: Do not drop mark reference under idr_lock
fsnotify: Free fsnotify_mark_connector when there is no mark attached
fsnotify: Lock object list with connector lock
...
-rw-r--r-- | fs/inode.c | 3 | ||||
-rw-r--r-- | fs/mount.h | 2 | ||||
-rw-r--r-- | fs/namespace.c | 3 | ||||
-rw-r--r-- | fs/notify/Makefile | 4 | ||||
-rw-r--r-- | fs/notify/dnotify/dnotify.c | 25 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 26 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.h | 1 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 77 | ||||
-rw-r--r-- | fs/notify/fdinfo.c | 16 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 107 | ||||
-rw-r--r-- | fs/notify/fsnotify.h | 48 | ||||
-rw-r--r-- | fs/notify/group.c | 20 | ||||
-rw-r--r-- | fs/notify/inode_mark.c | 199 | ||||
-rw-r--r-- | fs/notify/inotify/inotify.h | 4 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 18 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 81 | ||||
-rw-r--r-- | fs/notify/mark.c | 642 | ||||
-rw-r--r-- | fs/notify/vfsmount_mark.c | 108 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 95 | ||||
-rw-r--r-- | kernel/audit_fsnotify.c | 10 | ||||
-rw-r--r-- | kernel/audit_tree.c | 78 | ||||
-rw-r--r-- | kernel/audit_watch.c | 10 | ||||
-rw-r--r-- | kernel/auditsc.c | 5 |
24 files changed, 815 insertions, 771 deletions
diff --git a/fs/inode.c b/fs/inode.c index 88110fd0b282..131b2bcebc48 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -371,9 +371,6 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_lru); address_space_init_once(&inode->i_data); i_size_ordered_init(inode); -#ifdef CONFIG_FSNOTIFY - INIT_HLIST_HEAD(&inode->i_fsnotify_marks); -#endif } EXPORT_SYMBOL(inode_init_once); diff --git a/fs/mount.h b/fs/mount.h index 2826543a131d..bf1fda6eed8f 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -59,7 +59,7 @@ struct mount { struct mountpoint *mnt_mp; /* where is it mounted */ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ #ifdef CONFIG_FSNOTIFY - struct hlist_head mnt_fsnotify_marks; + struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; #endif int mnt_id; /* mount identifier */ diff --git a/fs/namespace.c b/fs/namespace.c index cc1375eff88c..b3b115bd4e1e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -236,9 +236,6 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); -#ifdef CONFIG_FSNOTIFY - INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); -#endif init_fs_pin(&mnt->mnt_umount, drop_mountpoint); } return mnt; diff --git a/fs/notify/Makefile b/fs/notify/Makefile index 96d3420d0242..3e969ae91b60 100644 --- a/fs/notify/Makefile +++ b/fs/notify/Makefile @@ -1,5 +1,5 @@ -obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \ - mark.o vfsmount_mark.o fdinfo.o +obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o mark.o \ + fdinfo.o obj-y += dnotify/ obj-y += inotify/ diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 5a4ec309e283..2430a0415995 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -52,7 +52,7 @@ struct dnotify_mark { */ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) { - __u32 new_mask, old_mask; + __u32 new_mask = 0; struct dnotify_struct *dn; struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, @@ -60,17 +60,13 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) assert_spin_locked(&fsn_mark->lock); - old_mask = fsn_mark->mask; - new_mask = 0; for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next) new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); - fsnotify_set_mark_mask_locked(fsn_mark, new_mask); - - if (old_mask == new_mask) + if (fsn_mark->mask == new_mask) return; + fsn_mark->mask = new_mask; - if (fsn_mark->inode) - fsnotify_recalc_inode_mask(fsn_mark->inode); + fsnotify_recalc_mask(fsn_mark->connector); } /* @@ -86,7 +82,8 @@ static int dnotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; @@ -138,6 +135,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) static struct fsnotify_ops dnotify_fsnotify_ops = { .handle_event = dnotify_handle_event, + .free_mark = dnotify_free_mark, }; /* @@ -160,7 +158,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) if (!S_ISDIR(inode->i_mode)) return; - fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); @@ -308,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; - fsnotify_init_mark(new_fsn_mark, dnotify_free_mark); + fsnotify_init_mark(new_fsn_mark, dnotify_group); new_fsn_mark->mask = mask; new_dn_mark->dn = NULL; @@ -316,13 +314,12 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) mutex_lock(&dnotify_group->mark_mutex); /* add the new_fsn_mark or find an old one. */ - fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode, - NULL, 0); + fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e5f7e47de68e..2fa99aeaa095 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -57,14 +57,26 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS static int fanotify_get_response(struct fsnotify_group *group, - struct fanotify_perm_event_info *event) + struct fanotify_perm_event_info *event, + struct fsnotify_iter_info *iter_info) { int ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); + /* + * fsnotify_prepare_user_wait() fails if we race with mark deletion. + * Just let the operation pass in that case. + */ + if (!fsnotify_prepare_user_wait(iter_info)) { + event->response = FAN_ALLOW; + goto out; + } + wait_event(group->fanotify_data.access_waitq, event->response); + fsnotify_finish_user_wait(iter_info); +out: /* userspace responded, convert to something usable */ switch (event->response) { case FAN_ALLOW: @@ -174,7 +186,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *fanotify_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { int ret = 0; struct fanotify_event_info *event; @@ -215,7 +228,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (mask & FAN_ALL_PERM_EVENTS) { - ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event)); + ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), + iter_info); fsnotify_destroy_event(group, fsn_event); } #endif @@ -248,8 +262,14 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) kmem_cache_free(fanotify_event_cachep, event); } +static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) +{ + kmem_cache_free(fanotify_mark_cache, fsn_mark); +} + const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .free_group_priv = fanotify_free_group_priv, .free_event = fanotify_free_event, + .free_mark = fanotify_free_mark, }; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4500a74f8d38..4eb6f5efa282 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -2,6 +2,7 @@ #include <linux/path.h> #include <linux/slab.h> +extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2b37f2785834..907a481ac781 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -41,7 +41,7 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; -static struct kmem_cache *fanotify_mark_cache __read_mostly; +struct kmem_cache *fanotify_mark_cache __read_mostly; struct kmem_cache *fanotify_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; @@ -295,27 +295,37 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, } ret = copy_event_to_user(group, kevent, buf); + if (unlikely(ret == -EOPENSTALE)) { + /* + * We cannot report events with stale fd so drop it. + * Setting ret to 0 will continue the event loop and + * do the right thing if there are no more events to + * read (i.e. return bytes read, -EAGAIN or wait). + */ + ret = 0; + } + /* * Permission events get queued to wait for response. Other * events can be destroyed now. */ if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { fsnotify_destroy_event(group, kevent); - if (ret < 0) - break; } else { #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (ret < 0) { + if (ret <= 0) { FANOTIFY_PE(kevent)->response = FAN_DENY; wake_up(&group->fanotify_data.access_waitq); - break; + } else { + spin_lock(&group->notification_lock); + list_add_tail(&kevent->list, + &group->fanotify_data.access_list); + spin_unlock(&group->notification_lock); } - spin_lock(&group->notification_lock); - list_add_tail(&kevent->list, - &group->fanotify_data.access_list); - spin_unlock(&group->notification_lock); #endif } + if (ret < 0) + break; buf += ret; count -= ret; } @@ -445,11 +455,6 @@ static const struct file_operations fanotify_fops = { .llseek = noop_llseek, }; -static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) -{ - kmem_cache_free(fanotify_mark_cache, fsn_mark); -} - static int fanotify_find_path(int dfd, const char __user *filename, struct path *path, unsigned int flags) { @@ -511,13 +516,12 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, tmask &= ~FAN_ONDIR; oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, tmask); + fsn_mark->mask = tmask; } else { __u32 tmask = fsn_mark->ignored_mask & ~mask; if (flags & FAN_MARK_ONDIR) tmask &= ~FAN_ONDIR; - - fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + fsn_mark->ignored_mask = tmask; } *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); spin_unlock(&fsn_mark->lock); @@ -534,7 +538,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, int destroy_mark; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); + fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, + group); if (!fsn_mark) { mutex_unlock(&group->mark_mutex); return -ENOENT; @@ -542,6 +547,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); + if (removed & real_mount(mnt)->mnt_fsnotify_mask) + fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); if (destroy_mark) fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); @@ -549,9 +556,6 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); - if (removed & real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_vfsmount_mask(mnt); - return 0; } @@ -564,7 +568,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, int destroy_mark; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) { mutex_unlock(&group->mark_mutex); return -ENOENT; @@ -572,16 +576,16 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); + if (removed & inode->i_fsnotify_mask) + fsnotify_recalc_mask(inode->i_fsnotify_marks); if (destroy_mark) fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); if (destroy_mark) fsnotify_free_mark(fsn_mark); - /* matches the fsnotify_find_inode_mark() */ + /* matches the fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); - if (removed & inode->i_fsnotify_mask) - fsnotify_recalc_inode_mask(inode); return 0; } @@ -600,13 +604,13 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, tmask |= FAN_ONDIR; oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, tmask); + fsn_mark->mask = tmask; } else { __u32 tmask = fsn_mark->ignored_mask | mask; if (flags & FAN_MARK_ONDIR) tmask |= FAN_ONDIR; - fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + fsn_mark->ignored_mask = tmask; if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } @@ -629,8 +633,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, if (!mark) return ERR_PTR(-ENOMEM); - fsnotify_init_mark(mark, fanotify_free_mark); - ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0); + fsnotify_init_mark(mark, group); + ret = fsnotify_add_mark_locked(mark, inode, mnt, 0); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -648,7 +652,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, __u32 added; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); + fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, + group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, NULL, mnt); if (IS_ERR(fsn_mark)) { @@ -657,10 +662,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - mutex_unlock(&group->mark_mutex); - if (added & ~real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_vfsmount_mask(mnt); + fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); return 0; @@ -686,7 +690,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, inode, NULL); if (IS_ERR(fsn_mark)) { @@ -695,10 +699,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - mutex_unlock(&group->mark_mutex); - if (added & ~inode->i_fsnotify_mask) - fsnotify_recalc_inode_mask(inode); + fsnotify_recalc_mask(inode->i_fsnotify_marks); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); return 0; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index fd98e5100cab..dd63aa9a6f9a 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -76,12 +76,11 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct inotify_inode_mark *inode_mark; struct inode *inode; - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) || - !(mark->flags & FSNOTIFY_MARK_FLAG_INODE)) + if (!(mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE)) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); - inode = igrab(mark->inode); + inode = igrab(mark->connector->inode); if (inode) { /* * IN_ALL_EVENTS represents all of the mask bits @@ -113,14 +112,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) unsigned int mflags = 0; struct inode *inode; - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) - return; - if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = igrab(mark->inode); + if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = igrab(mark->connector->inode); if (!inode) return; seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", @@ -129,8 +125,8 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); - } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) { - struct mount *mnt = real_mount(mark->mnt); + } else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + struct mount *mnt = real_mount(mark->connector->mnt); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index b41515d3f081..01a9f0f007d4 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -41,6 +41,63 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } +/** + * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. + * @sb: superblock being unmounted. + * + * Called during unmount with no locks held, so needs to be safe against + * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. + */ +void fsnotify_unmount_inodes(struct super_block *sb) +{ + struct inode *inode, *iput_inode = NULL; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + /* + * We cannot __iget() an inode in state I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); + continue; + } + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); + continue; + } + + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); + + /* for each watch, send FS_UNMOUNT and then remove it */ + fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + + fsnotify_inode_delete(inode); + + iput_inode = inode; + + spin_lock(&sb->s_inode_list_lock); + } + spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); +} + /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event @@ -127,7 +184,8 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_mark *vfsmount_mark, __u32 mask, const void *data, int data_is, u32 cookie, - const unsigned char *file_name) + const unsigned char *file_name, + struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; __u32 inode_test_mask = 0; @@ -178,7 +236,7 @@ static int send_to_group(struct inode *to_tell, return group->ops->handle_event(group, to_tell, inode_mark, vfsmount_mark, mask, data, data_is, - file_name, cookie); + file_name, cookie, iter_info); } /* @@ -193,8 +251,10 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; struct fsnotify_group *inode_group, *vfsmount_group; + struct fsnotify_mark_connector *inode_conn, *vfsmount_conn; + struct fsnotify_iter_info iter_info; struct mount *mnt; - int idx, ret = 0; + int ret = 0; /* global tests shouldn't care about events on child only the specific event */ __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); @@ -210,8 +270,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (hlist_empty(&to_tell->i_fsnotify_marks) && - (!mnt || hlist_empty(&mnt->mnt_fsnotify_marks))) + if (!to_tell->i_fsnotify_marks && + (!mnt || !mnt->mnt_fsnotify_marks)) return 0; /* * if this is a modify event we may need to clear the ignored masks @@ -223,19 +283,30 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, !(mnt && test_mask & mnt->mnt_fsnotify_mask)) return 0; - idx = srcu_read_lock(&fsnotify_mark_srcu); + iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); if ((mask & FS_MODIFY) || - (test_mask & to_tell->i_fsnotify_mask)) - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, + (test_mask & to_tell->i_fsnotify_mask)) { + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, - &fsnotify_mark_srcu); - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks, + &fsnotify_mark_srcu); + if (vfsmount_conn) + vfsmount_node = srcu_dereference( + vfsmount_conn->list.first, + &fsnotify_mark_srcu); } /* @@ -272,8 +343,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, vfsmount_mark = NULL; } } + + iter_info.inode_mark = inode_mark; + iter_info.vfsmount_mark = vfsmount_mark; + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, - data, data_is, cookie, file_name); + data, data_is, cookie, file_name, + &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; @@ -287,12 +363,14 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, } ret = 0; out: - srcu_read_unlock(&fsnotify_mark_srcu, idx); + srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx); return ret; } EXPORT_SYMBOL_GPL(fsnotify); +extern struct kmem_cache *fsnotify_mark_connector_cachep; + static __init int fsnotify_init(void) { int ret; @@ -303,6 +381,9 @@ static __init int fsnotify_init(void) if (ret) panic("initializing fsnotify_mark_srcu"); + fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, + SLAB_PANIC); + return 0; } core_initcall(fsnotify_init); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 0a3bc2cf192c..bf012e8ecd14 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -8,60 +8,36 @@ #include "../mount.h" +struct fsnotify_iter_info { + struct fsnotify_mark *inode_mark; + struct fsnotify_mark *vfsmount_mark; + int srcu_idx; +}; + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; -/* Calculate mask of events for a list of marks */ -extern u32 fsnotify_recalc_mask(struct hlist_head *head); - /* compare two groups for sorting of marks lists */ extern int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b); -extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, - __u32 mask); -/* Add mark to a proper place in mark list */ -extern int fsnotify_add_mark_list(struct hlist_head *head, - struct fsnotify_mark *mark, - int allow_dups); -/* add a mark to an inode */ -extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - int allow_dups); -/* add a mark to a vfsmount */ -extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct vfsmount *mnt, - int allow_dups); - -/* vfsmount specific destruction of a mark */ -extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); -/* inode specific destruction of a mark */ -extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark); -/* Find mark belonging to given group in the list of marks */ -extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, - struct fsnotify_group *group); -/* Destroy all marks in the given list protected by 'lock' */ -extern void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock); +/* Destroy all marks connected via given connector */ +extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp); /* run the list of all marks associated with inode and destroy them */ static inline void fsnotify_clear_marks_by_inode(struct inode *inode) { - fsnotify_destroy_marks(&inode->i_fsnotify_marks, &inode->i_lock); + fsnotify_destroy_marks(&inode->i_fsnotify_marks); } /* run the list of all marks associated with vfsmount and destroy them */ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { - fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks, - &mnt->mnt_root->d_lock); + fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks); } -/* prepare for freeing all marks associated with given group */ -extern void fsnotify_detach_group_marks(struct fsnotify_group *group); -/* - * wait for fsnotify_mark_srcu period to end and free all marks in destroy_list - */ -extern void fsnotify_mark_destroy_list(void); +/* Wait until all marks queued for destruction are destroyed */ +extern void fsnotify_wait_marks_destroyed(void); /* * update the dentry->d_flags of all of inode's children to indicate if inode cares diff --git a/fs/notify/group.c b/fs/notify/group.c index fbe3cbebec16..32357534de18 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -66,14 +66,23 @@ void fsnotify_destroy_group(struct fsnotify_group *group) */ fsnotify_group_stop_queueing(group); - /* clear all inode marks for this group, attach them to destroy_list */ - fsnotify_detach_group_marks(group); + /* Clear all marks for this group and queue them for destruction */ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES); /* - * Wait for fsnotify_mark_srcu period to end and free all marks in - * destroy_list + * Some marks can still be pinned when waiting for response from + * userspace. Wait for those now. fsnotify_prepare_user_wait() will + * not succeed now so this wait is race-free. */ - fsnotify_mark_destroy_list(); + wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); + + /* + * Wait until all marks get really destroyed. We could actually destroy + * them ourselves instead of waiting for worker to do it, however that + * would be racy as worker can already be processing some marks before + * we even entered fsnotify_destroy_group(). + */ + fsnotify_wait_marks_destroyed(); /* * Since we have waited for fsnotify_mark_srcu in @@ -124,6 +133,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) /* set to 0 when there a no external references to this group */ atomic_set(&group->refcnt, 1); atomic_set(&group->num_marks, 0); + atomic_set(&group->user_waits, 0); spin_lock_init(&group->notification_lock); INIT_LIST_HEAD(&group->notification_list); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c deleted file mode 100644 index a3645249f7ec..000000000000 --- a/fs/notify/inode_mark.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> - -#include <linux/atomic.h> - -#include <linux/fsnotify_backend.h> -#include "fsnotify.h" - -#include "../internal.h" - -/* - * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types - * any notifier is interested in hearing for this inode. - */ -void fsnotify_recalc_inode_mask(struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); - - __fsnotify_update_child_dentry_flags(inode); -} - -void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) -{ - struct inode *inode = mark->inode; - - BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&inode->i_lock); - - hlist_del_init_rcu(&mark->obj_list); - mark->inode = NULL; - - /* - * this mark is now off the inode->i_fsnotify_marks list and we - * hold the inode->i_lock, so this is the perfect time to update the - * inode->i_fsnotify_mask - */ - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); -} - -/* - * Given a group clear all of the inode marks associated with that group. - */ -void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE); -} - -/* - * given a group and inode, find the mark associated with that combination. - * if found take a reference to that mark and return it, else return NULL - */ -struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, - struct inode *inode) -{ - struct fsnotify_mark *mark; - - spin_lock(&inode->i_lock); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); - spin_unlock(&inode->i_lock); - - return mark; -} - -/* - * If we are setting a mark mask on an inode mark we should pin the inode - * in memory. - */ -void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark, - __u32 mask) -{ - struct inode *inode; - - assert_spin_locked(&mark->lock); - - if (mask && - mark->inode && - !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) { - mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED; - inode = igrab(mark->inode); - /* - * we shouldn't be able to get here if the inode wasn't - * already safely held in memory. But bug in case it - * ever is wrong. - */ - BUG_ON(!inode); - } -} - -/* - * Attach an initialized mark to a given inode. - * These marks may be used for the fsnotify backend to determine which - * event types should be delivered to which group and for which inodes. These - * marks are ordered according to priority, highest number first, and then by - * the group's location in memory. - */ -int fsnotify_add_inode_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - int allow_dups) -{ - int ret; - - mark->flags |= FSNOTIFY_MARK_FLAG_INODE; - - BUG_ON(!mutex_is_locked(&group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&inode->i_lock); - mark->inode = inode; - ret = fsnotify_add_mark_list(&inode->i_fsnotify_marks, mark, - allow_dups); - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); - - return ret; -} - -/** - * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. - * @sb: superblock being unmounted. - * - * Called during unmount with no locks held, so needs to be safe against - * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. - */ -void fsnotify_unmount_inodes(struct super_block *sb) -{ - struct inode *inode, *iput_inode = NULL; - - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - /* - * We cannot __iget() an inode in state I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - */ - if (!atomic_read(&inode->i_count)) { - spin_unlock(&inode->i_lock); - continue; - } - - __iget(inode); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_list_lock); - - if (iput_inode) - iput(iput_inode); - - /* for each watch, send FS_UNMOUNT and then remove it */ - fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - - fsnotify_inode_delete(inode); - - iput_inode = inode; - - spin_lock(&sb->s_inode_list_lock); - } - spin_unlock(&sb->s_inode_list_lock); - - if (iput_inode) - iput(iput_inode); -} diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7c461fd49c4c..9ff67b61da8a 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,9 +27,11 @@ extern int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie); + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info); extern const struct fsnotify_ops inotify_fsnotify_ops; +extern struct kmem_cache *inotify_inode_mark_cachep; #ifdef CONFIG_INOTIFY_USER static inline void dec_inotify_instances(struct ucounts *ucounts) diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 1aeb837ae414..8b73332735ba 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -68,7 +68,8 @@ int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -156,8 +157,8 @@ static int idr_callback(int id, void *p, void *data) * BUG() that was here. */ if (fsn_mark) - printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n", - fsn_mark->group, fsn_mark->inode, i_mark->wd); + printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n", + fsn_mark->group, i_mark->wd); return 0; } @@ -175,9 +176,20 @@ static void inotify_free_event(struct fsnotify_event *fsn_event) kfree(INOTIFY_E(fsn_event)); } +/* ding dong the mark is dead */ +static void inotify_free_mark(struct fsnotify_mark *fsn_mark) +{ + struct inotify_inode_mark *i_mark; + + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); + + kmem_cache_free(inotify_inode_mark_cachep, i_mark); +} + const struct fsnotify_ops inotify_fsnotify_ops = { .handle_event = inotify_handle_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, + .free_mark = inotify_free_mark, }; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 498d609b26c7..7cc7d3fb1862 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -47,7 +47,7 @@ /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; +struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #ifdef CONFIG_SYSCTL @@ -395,21 +395,6 @@ static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group, return i_mark; } -static void do_inotify_remove_from_idr(struct fsnotify_group *group, - struct inotify_inode_mark *i_mark) -{ - struct idr *idr = &group->inotify_data.idr; - spinlock_t *idr_lock = &group->inotify_data.idr_lock; - int wd = i_mark->wd; - - assert_spin_locked(idr_lock); - - idr_remove(idr, wd); - - /* removed from the idr, drop that ref */ - fsnotify_put_mark(&i_mark->fsn_mark); -} - /* * Remove the mark from the idr (if present) and drop the reference * on the mark because it was in the idr. @@ -417,6 +402,7 @@ static void do_inotify_remove_from_idr(struct fsnotify_group *group, static void inotify_remove_from_idr(struct fsnotify_group *group, struct inotify_inode_mark *i_mark) { + struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; struct inotify_inode_mark *found_i_mark = NULL; int wd; @@ -429,18 +415,16 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, * if it wasn't.... */ if (wd == -1) { - WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } /* Lets look in the idr to see if we find it */ found_i_mark = inotify_idr_find_locked(group, wd); if (unlikely(!found_i_mark)) { - WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } @@ -451,35 +435,33 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, */ if (unlikely(found_i_mark != i_mark)) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p " - "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d " - "found_i_mark->group=%p found_i_mark->inode=%p\n", - __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group, - i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd, - found_i_mark->fsn_mark.group, - found_i_mark->fsn_mark.inode); + "found_i_mark=%p found_i_mark->wd=%d " + "found_i_mark->group=%p\n", __func__, i_mark, + i_mark->wd, i_mark->fsn_mark.group, found_i_mark, + found_i_mark->wd, found_i_mark->fsn_mark.group); goto out; } /* * One ref for being in the idr - * one ref held by the caller trying to kill us * one ref grabbed by inotify_idr_find */ - if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) { - printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) { + printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); /* we can't really recover with bad ref cnting.. */ BUG(); } - do_inotify_remove_from_idr(group, i_mark); + idr_remove(idr, wd); + /* Removed from the idr, drop that ref. */ + fsnotify_put_mark(&i_mark->fsn_mark); out: + i_mark->wd = -1; + spin_unlock(idr_lock); /* match the ref taken by inotify_idr_find_locked() */ if (found_i_mark) fsnotify_put_mark(&found_i_mark->fsn_mark); - i_mark->wd = -1; - spin_unlock(idr_lock); } /* @@ -492,7 +474,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* Queue ignore event for the watch */ inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL, 0); + NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -501,16 +483,6 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, dec_inotify_watches(group->inotify_data.ucounts); } -/* ding dong the mark is dead */ -static void inotify_free_mark(struct fsnotify_mark *fsn_mark) -{ - struct inotify_inode_mark *i_mark; - - i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); - - kmem_cache_free(inotify_inode_mark_cachep, i_mark); -} - static int inotify_update_existing_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) @@ -524,21 +496,19 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, mask = inotify_arg_to_mask(arg); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); spin_lock(&fsn_mark->lock); - old_mask = fsn_mark->mask; if (add) - fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask)); + fsn_mark->mask |= mask; else - fsnotify_set_mark_mask_locked(fsn_mark, mask); + fsn_mark->mask = mask; new_mask = fsn_mark->mask; - spin_unlock(&fsn_mark->lock); if (old_mask != new_mask) { @@ -549,7 +519,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* update the inode with this new fsn_mark */ if (dropped || do_inode) - fsnotify_recalc_inode_mask(inode); + fsnotify_recalc_mask(inode->i_fsnotify_marks); } @@ -578,7 +548,7 @@ static int inotify_new_watch(struct fsnotify_group *group, if (unlikely(!tmp_i_mark)) return -ENOMEM; - fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark); + fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); tmp_i_mark->fsn_mark.mask = mask; tmp_i_mark->wd = -1; @@ -594,8 +564,7 @@ static int inotify_new_watch(struct fsnotify_group *group, } /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, - NULL, 0); + ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 6043306e8e21..9991f8826734 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -33,7 +33,7 @@ * * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock * * group->mark_mutex protects the marks_list anchored inside a given group and * each mark is hooked via the g_list. It also protects the groups private @@ -44,14 +44,22 @@ * is assigned to as well as the access to a reference of the inode/vfsmount * that is being watched by the mark. * - * inode->i_lock protects the i_fsnotify_marks list anchored inside a - * given inode and each mark is hooked via the i_list. (and sorta the - * free_i_list) + * mark->connector->lock protects the list of marks anchored inside an + * inode / vfsmount and each mark is hooked via the i_list. * + * A list of notification marks relating to inode / mnt is contained in + * fsnotify_mark_connector. That structure is alive as long as there are any + * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets + * detached from fsnotify_mark_connector when last reference to the mark is + * dropped. Thus having mark reference is enough to protect mark->connector + * pointer and to make sure fsnotify_mark_connector cannot disappear. Also + * because we remove mark from g_list before dropping mark reference associated + * with that, any mark found through g_list is guaranteed to have + * mark->connector set until we drop group->mark_mutex. * * LIFETIME: * Inode marks survive between when they are added to an inode and when their - * refcnt==0. + * refcnt==0. Marks are also protected by fsnotify_mark_srcu. * * The inode mark can be cleared for a number of different reasons including: * - The inode is unlinked for the last time. (fsnotify_inode_remove) @@ -61,17 +69,6 @@ * - The fsnotify_group associated with the mark is going away and all such marks * need to be cleaned up. (fsnotify_clear_marks_by_group) * - * Worst case we are given an inode and need to clean up all the marks on that - * inode. We take i_lock and walk the i_fsnotify_marks safely. For each - * mark on the list we take a reference (so the mark can't disappear under us). - * We remove that mark form the inode's list of marks and we add this mark to a - * private list anchored on the stack using i_free_list; we walk i_free_list - * and before we destroy the mark we make sure that we dont race with a - * concurrent destroy_group by getting a ref to the marks group and taking the - * groups mutex. - - * Very similarly for freeing by group, except we use free_g_list. - * * This has the very interesting property of being able to run concurrently with * any (or all) other directions. */ @@ -94,94 +91,281 @@ #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ struct srcu_struct fsnotify_mark_srcu; +struct kmem_cache *fsnotify_mark_connector_cachep; + static DEFINE_SPINLOCK(destroy_lock); static LIST_HEAD(destroy_list); +static struct fsnotify_mark_connector *connector_destroy_list; static void fsnotify_mark_destroy_workfn(struct work_struct *work); static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); +static void fsnotify_connector_destroy_workfn(struct work_struct *work); +static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); + void fsnotify_get_mark(struct fsnotify_mark *mark) { + WARN_ON_ONCE(!atomic_read(&mark->refcnt)); atomic_inc(&mark->refcnt); } -void fsnotify_put_mark(struct fsnotify_mark *mark) +/* + * Get mark reference when we found the mark via lockless traversal of object + * list. Mark can be already removed from the list by now and on its way to be + * destroyed once SRCU period ends. + */ +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - if (atomic_dec_and_test(&mark->refcnt)) { - if (mark->group) - fsnotify_put_group(mark->group); - mark->free_mark(mark); - } + return atomic_inc_not_zero(&mark->refcnt); } -/* Calculate mask of events for a list of marks */ -u32 fsnotify_recalc_mask(struct hlist_head *head) +static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; struct fsnotify_mark *mark; - hlist_for_each_entry(mark, head, obj_list) - new_mask |= mark->mask; - return new_mask; + assert_spin_locked(&conn->lock); + hlist_for_each_entry(mark, &conn->list, obj_list) { + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) + new_mask |= mark->mask; + } + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + conn->inode->i_fsnotify_mask = new_mask; + else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) + real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; } /* - * Remove mark from inode / vfsmount list, group list, drop inode reference - * if we got one. - * - * Must be called with group->mark_mutex held. + * Calculate mask of events for a list of marks. The caller must make sure + * connector and connector->inode cannot disappear under us. Callers achieve + * this by holding a mark->lock or mark->group->mark_mutex for a mark on this + * list. */ -void fsnotify_detach_mark(struct fsnotify_mark *mark) +void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +{ + if (!conn) + return; + + spin_lock(&conn->lock); + __fsnotify_recalc_mask(conn); + spin_unlock(&conn->lock); + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + __fsnotify_update_child_dentry_flags(conn->inode); +} + +/* Free all connectors queued for freeing once SRCU period ends */ +static void fsnotify_connector_destroy_workfn(struct work_struct *work) +{ + struct fsnotify_mark_connector *conn, *free; + + spin_lock(&destroy_lock); + conn = connector_destroy_list; + connector_destroy_list = NULL; + spin_unlock(&destroy_lock); + + synchronize_srcu(&fsnotify_mark_srcu); + while (conn) { + free = conn; + conn = conn->destroy_next; + kmem_cache_free(fsnotify_mark_connector_cachep, free); + } +} + +static struct inode *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn) { struct inode *inode = NULL; + + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = conn->inode; + rcu_assign_pointer(inode->i_fsnotify_marks, NULL); + inode->i_fsnotify_mask = 0; + conn->inode = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, + NULL); + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; + conn->mnt = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; + } + + return inode; +} + +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) +{ struct fsnotify_group *group = mark->group; - BUG_ON(!mutex_is_locked(&group->mark_mutex)); + if (WARN_ON_ONCE(!group)) + return; + group->ops->free_mark(mark); + fsnotify_put_group(group); +} - spin_lock(&mark->lock); +void fsnotify_put_mark(struct fsnotify_mark *mark) +{ + struct fsnotify_mark_connector *conn; + struct inode *inode = NULL; + bool free_conn = false; - /* something else already called this function on this mark */ - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { - spin_unlock(&mark->lock); + /* Catch marks that were actually never attached to object */ + if (!mark->connector) { + if (atomic_dec_and_test(&mark->refcnt)) + fsnotify_final_mark_destroy(mark); return; } - mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; + /* + * We have to be careful so that traversals of obj_list under lock can + * safely grab mark reference. + */ + if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + return; - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = mark->inode; - fsnotify_destroy_inode_mark(mark); - } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) - fsnotify_destroy_vfsmount_mark(mark); - else - BUG(); + conn = mark->connector; + hlist_del_init_rcu(&mark->obj_list); + if (hlist_empty(&conn->list)) { + inode = fsnotify_detach_connector_from_object(conn); + free_conn = true; + } else { + __fsnotify_recalc_mask(conn); + } + mark->connector = NULL; + spin_unlock(&conn->lock); + + iput(inode); + + if (free_conn) { + spin_lock(&destroy_lock); + conn->destroy_next = connector_destroy_list; + connector_destroy_list = conn; + spin_unlock(&destroy_lock); + queue_work(system_unbound_wq, &connector_reaper_work); + } /* * Note that we didn't update flags telling whether inode cares about * what's happening with children. We update these flags from * __fsnotify_parent() lazily when next event happens on one of our * children. */ + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); +} - list_del_init(&mark->g_list); +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *group; - spin_unlock(&mark->lock); + if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) + return false; + + if (iter_info->inode_mark) + group = iter_info->inode_mark->group; + else + group = iter_info->vfsmount_mark->group; + + /* + * Since acquisition of mark reference is an atomic op as well, we can + * be sure this inc is seen before any effect of refcount increment. + */ + atomic_inc(&group->user_waits); + + if (iter_info->inode_mark) { + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(iter_info->inode_mark)) + goto out_wait; + } + if (iter_info->vfsmount_mark) { + if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) + goto out_inode; + } - if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) - iput(inode); + /* + * Now that both marks are pinned by refcount in the inode / vfsmount + * lists, we can drop SRCU lock, and safely resume the list iteration + * once userspace returns. + */ + srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); + + return true; +out_inode: + if (iter_info->inode_mark) + fsnotify_put_mark(iter_info->inode_mark); +out_wait: + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); + return false; +} + +void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *group = NULL; + + iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); + if (iter_info->inode_mark) { + group = iter_info->inode_mark->group; + fsnotify_put_mark(iter_info->inode_mark); + } + if (iter_info->vfsmount_mark) { + group = iter_info->vfsmount_mark->group; + fsnotify_put_mark(iter_info->vfsmount_mark); + } + /* + * We abuse notification_waitq on group shutdown for waiting for all + * marks pinned when waiting for userspace. + */ + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); +} + +/* + * Mark mark as detached, remove it from group list. Mark still stays in object + * list until its last reference is dropped. Note that we rely on mark being + * removed from group list before corresponding reference to it is dropped. In + * particular we rely on mark->connector being valid while we hold + * group->mark_mutex if we found the mark through g_list. + * + * Must be called with group->mark_mutex held. The caller must either hold + * reference to the mark or be protected by fsnotify_mark_srcu. + */ +void fsnotify_detach_mark(struct fsnotify_mark *mark) +{ + struct fsnotify_group *group = mark->group; + + WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); + WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && + atomic_read(&mark->refcnt) < 1 + + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); + + spin_lock(&mark->lock); + /* something else already called this function on this mark */ + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { + spin_unlock(&mark->lock); + return; + } + mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; + list_del_init(&mark->g_list); + spin_unlock(&mark->lock); atomic_dec(&group->num_marks); + + /* Drop mark reference acquired in fsnotify_add_mark_locked() */ + fsnotify_put_mark(mark); } /* - * Prepare mark for freeing and add it to the list of marks prepared for - * freeing. The actual freeing must happen after SRCU period ends and the - * caller is responsible for this. + * Free fsnotify mark. The mark is actually only marked as being freed. The + * freeing is actually happening only once last reference to the mark is + * dropped from a workqueue which first waits for srcu period end. * - * The function returns true if the mark was added to the list of marks for - * freeing. The function returns false if someone else has already called - * __fsnotify_free_mark() for the mark. + * Caller must have a reference to the mark or be protected by + * fsnotify_mark_srcu. */ -static bool __fsnotify_free_mark(struct fsnotify_mark *mark) +void fsnotify_free_mark(struct fsnotify_mark *mark) { struct fsnotify_group *group = mark->group; @@ -189,7 +373,7 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark) /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { spin_unlock(&mark->lock); - return false; + return; } mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; spin_unlock(&mark->lock); @@ -201,25 +385,6 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark) */ if (group->ops->freeing_mark) group->ops->freeing_mark(mark, group); - - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - - return true; -} - -/* - * Free fsnotify mark. The freeing is actually happening from a workqueue which - * first waits for srcu period end. Caller must have a reference to the mark - * or be protected by fsnotify_mark_srcu. - */ -void fsnotify_free_mark(struct fsnotify_mark *mark) -{ - if (__fsnotify_free_mark(mark)) { - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - } } void fsnotify_destroy_mark(struct fsnotify_mark *mark, @@ -231,54 +396,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark, fsnotify_free_mark(mark); } -void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock) -{ - struct fsnotify_mark *mark; - - while (1) { - /* - * We have to be careful since we can race with e.g. - * fsnotify_clear_marks_by_group() and once we drop 'lock', - * mark can get removed from the obj_list and destroyed. But - * we are holding mark reference so mark cannot be freed and - * calling fsnotify_destroy_mark() more than once is fine. - */ - spin_lock(lock); - if (hlist_empty(head)) { - spin_unlock(lock); - break; - } - mark = hlist_entry(head->first, struct fsnotify_mark, obj_list); - /* - * We don't update i_fsnotify_mask / mnt_fsnotify_mask here - * since inode / mount is going away anyway. So just remove - * mark from the list. - */ - hlist_del_init_rcu(&mark->obj_list); - fsnotify_get_mark(mark); - spin_unlock(lock); - fsnotify_destroy_mark(mark, mark->group); - fsnotify_put_mark(mark); - } -} - -void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) -{ - assert_spin_locked(&mark->lock); - - mark->mask = mask; - - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) - fsnotify_set_inode_mark_mask_locked(mark, mask); -} - -void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask) -{ - assert_spin_locked(&mark->lock); - - mark->ignored_mask = mask; -} - /* * Sorting function for lists of fsnotify marks. * @@ -315,37 +432,133 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } -/* Add mark into proper place in given list of marks */ -int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, - int allow_dups) +static int fsnotify_attach_connector_to_object( + struct fsnotify_mark_connector __rcu **connp, + struct inode *inode, + struct vfsmount *mnt) +{ + struct fsnotify_mark_connector *conn; + + conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); + if (!conn) + return -ENOMEM; + spin_lock_init(&conn->lock); + INIT_HLIST_HEAD(&conn->list); + if (inode) { + conn->flags = FSNOTIFY_OBJ_TYPE_INODE; + conn->inode = igrab(inode); + } else { + conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT; + conn->mnt = mnt; + } + /* + * cmpxchg() provides the barrier so that readers of *connp can see + * only initialized structure + */ + if (cmpxchg(connp, NULL, conn)) { + /* Someone else created list structure for us */ + if (inode) + iput(inode); + kmem_cache_free(fsnotify_mark_connector_cachep, conn); + } + + return 0; +} + +/* + * Get mark connector, make sure it is alive and return with its lock held. + * This is for users that get connector pointer from inode or mount. Users that + * hold reference to a mark on the list may directly lock connector->lock as + * they are sure list cannot go away under them. + */ +static struct fsnotify_mark_connector *fsnotify_grab_connector( + struct fsnotify_mark_connector __rcu **connp) +{ + struct fsnotify_mark_connector *conn; + int idx; + + idx = srcu_read_lock(&fsnotify_mark_srcu); + conn = srcu_dereference(*connp, &fsnotify_mark_srcu); + if (!conn) + goto out; + spin_lock(&conn->lock); + if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE | + FSNOTIFY_OBJ_TYPE_VFSMOUNT))) { + spin_unlock(&conn->lock); + srcu_read_unlock(&fsnotify_mark_srcu, idx); + return NULL; + } +out: + srcu_read_unlock(&fsnotify_mark_srcu, idx); + return conn; +} + +/* + * Add mark into proper place in given list of marks. These marks may be used + * for the fsnotify backend to determine which event types should be delivered + * to which group and for which inodes. These marks are ordered according to + * priority, highest number first, and then by the group's location in memory. + */ +static int fsnotify_add_mark_list(struct fsnotify_mark *mark, + struct inode *inode, struct vfsmount *mnt, + int allow_dups) { struct fsnotify_mark *lmark, *last = NULL; + struct fsnotify_mark_connector *conn; + struct fsnotify_mark_connector __rcu **connp; int cmp; + int err = 0; + + if (WARN_ON(!inode && !mnt)) + return -EINVAL; + if (inode) + connp = &inode->i_fsnotify_marks; + else + connp = &real_mount(mnt)->mnt_fsnotify_marks; +restart: + spin_lock(&mark->lock); + conn = fsnotify_grab_connector(connp); + if (!conn) { + spin_unlock(&mark->lock); + err = fsnotify_attach_connector_to_object(connp, inode, mnt); + if (err) + return err; + goto restart; + } /* is mark the first mark? */ - if (hlist_empty(head)) { - hlist_add_head_rcu(&mark->obj_list, head); - return 0; + if (hlist_empty(&conn->list)) { + hlist_add_head_rcu(&mark->obj_list, &conn->list); + goto added; } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, head, obj_list) { + hlist_for_each_entry(lmark, &conn->list, obj_list) { last = lmark; - if ((lmark->group == mark->group) && !allow_dups) - return -EEXIST; + if ((lmark->group == mark->group) && + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && + !allow_dups) { + err = -EEXIST; + goto out_err; + } cmp = fsnotify_compare_groups(lmark->group, mark->group); if (cmp >= 0) { hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); - return 0; + goto added; } } BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); - return 0; +added: + mark->connector = conn; +out_err: + spin_unlock(&conn->lock); + spin_unlock(&mark->lock); + return err; } /* @@ -353,10 +566,10 @@ int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. */ -int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, struct vfsmount *mnt, int allow_dups) { + struct fsnotify_group *group = mark->group; int ret = 0; BUG_ON(inode && mnt); @@ -367,61 +580,42 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, * LOCKING ORDER!!!! * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock */ spin_lock(&mark->lock); mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; - fsnotify_get_group(group); - mark->group = group; list_add(&mark->g_list, &group->marks_list); atomic_inc(&group->num_marks); - fsnotify_get_mark(mark); /* for i_list and g_list */ - - if (inode) { - ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups); - if (ret) - goto err; - } else if (mnt) { - ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups); - if (ret) - goto err; - } else { - BUG(); - } - - /* this will pin the object if appropriate */ - fsnotify_set_mark_mask_locked(mark, mark->mask); + fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - if (inode) - __fsnotify_update_child_dentry_flags(inode); + ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); + if (ret) + goto err; + + if (mark->mask) + fsnotify_recalc_mask(mark->connector); return ret; err: - mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; + mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | + FSNOTIFY_MARK_FLAG_ATTACHED); list_del_init(&mark->g_list); - fsnotify_put_group(group); - mark->group = NULL; atomic_dec(&group->num_marks); - spin_unlock(&mark->lock); - - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - + fsnotify_put_mark(mark); return ret; } -int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, - struct inode *inode, struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, + struct vfsmount *mnt, int allow_dups) { int ret; + struct fsnotify_group *group = mark->group; + mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); + ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups); mutex_unlock(&group->mark_mutex); return ret; } @@ -430,29 +624,42 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, * Given a list of marks, find the mark associated with given group. If found * take a reference to that mark and return it, else return NULL. */ -struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, - struct fsnotify_group *group) +struct fsnotify_mark *fsnotify_find_mark( + struct fsnotify_mark_connector __rcu **connp, + struct fsnotify_group *group) { + struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark; - hlist_for_each_entry(mark, head, obj_list) { - if (mark->group == group) { + conn = fsnotify_grab_connector(connp); + if (!conn) + return NULL; + + hlist_for_each_entry(mark, &conn->list, obj_list) { + if (mark->group == group && + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { fsnotify_get_mark(mark); + spin_unlock(&conn->lock); return mark; } } + spin_unlock(&conn->lock); return NULL; } -/* - * clear any marks in a group in which mark->flags & flags is true - */ -void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, - unsigned int flags) +/* Clear any marks in a group with given type */ +void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + unsigned int type) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(to_free); + struct list_head *head = &to_free; + /* Skip selection step if we want to clear all marks. */ + if (type == FSNOTIFY_OBJ_ALL_TYPES) { + head = &group->marks_list; + goto clear; + } /* * We have to be really careful here. Anytime we drop mark_mutex, e.g. * fsnotify_clear_marks_by_inode() can come and free marks. Even in our @@ -464,18 +671,19 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->flags & flags) + if (mark->connector->flags & type) list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); +clear: while (1) { mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); - if (list_empty(&to_free)) { + if (list_empty(head)) { mutex_unlock(&group->mark_mutex); break; } - mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); + mark = list_first_entry(head, struct fsnotify_mark, g_list); fsnotify_get_mark(mark); fsnotify_detach_mark(mark); mutex_unlock(&group->mark_mutex); @@ -484,49 +692,62 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, } } -/* - * Given a group, prepare for freeing all the marks associated with that group. - * The marks are attached to the list of marks prepared for destruction, the - * caller is responsible for freeing marks in that list after SRCU period has - * ended. - */ -void fsnotify_detach_group_marks(struct fsnotify_group *group) +/* Destroy all marks attached to inode / vfsmount */ +void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) { - struct fsnotify_mark *mark; + struct fsnotify_mark_connector *conn; + struct fsnotify_mark *mark, *old_mark = NULL; + struct inode *inode; - while (1) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); - if (list_empty(&group->marks_list)) { - mutex_unlock(&group->mark_mutex); - break; - } - mark = list_first_entry(&group->marks_list, - struct fsnotify_mark, g_list); + conn = fsnotify_grab_connector(connp); + if (!conn) + return; + /* + * We have to be careful since we can race with e.g. + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the + * list can get modified. However we are holding mark reference and + * thus our mark cannot be removed from obj_list so we can continue + * iteration after regaining conn->lock. + */ + hlist_for_each_entry(mark, &conn->list, obj_list) { fsnotify_get_mark(mark); - fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); - __fsnotify_free_mark(mark); - fsnotify_put_mark(mark); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + old_mark = mark; + fsnotify_destroy_mark(mark, mark->group); + spin_lock(&conn->lock); } + /* + * Detach list from object now so that we don't pin inode until all + * mark references get dropped. It would lead to strange results such + * as delaying inode deletion or blocking unmount. + */ + inode = fsnotify_detach_connector_from_object(conn); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + iput(inode); } /* * Nothing fancy, just initialize lists and locks and counters. */ void fsnotify_init_mark(struct fsnotify_mark *mark, - void (*free_mark)(struct fsnotify_mark *mark)) + struct fsnotify_group *group) { memset(mark, 0, sizeof(*mark)); spin_lock_init(&mark->lock); atomic_set(&mark->refcnt, 1); - mark->free_mark = free_mark; + fsnotify_get_group(group); + mark->group = group; } /* * Destroy all marks in destroy_list, waits for SRCU period to finish before * actually freeing marks. */ -void fsnotify_mark_destroy_list(void) +static void fsnotify_mark_destroy_workfn(struct work_struct *work) { struct fsnotify_mark *mark, *next; struct list_head private_destroy_list; @@ -540,11 +761,12 @@ void fsnotify_mark_destroy_list(void) list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { list_del_init(&mark->g_list); - fsnotify_put_mark(mark); + fsnotify_final_mark_destroy(mark); } } -static void fsnotify_mark_destroy_workfn(struct work_struct *work) +/* Wait for all marks queued for destruction to be actually destroyed */ +void fsnotify_wait_marks_destroyed(void) { - fsnotify_mark_destroy_list(); + flush_delayed_work(&reaper_work); } diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c deleted file mode 100644 index a8fcab68faef..000000000000 --- a/fs/notify/vfsmount_mark.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mount.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> - -#include <linux/atomic.h> - -#include <linux/fsnotify_backend.h> -#include "fsnotify.h" - -void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT); -} - -/* - * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types - * any notifier is interested in hearing for this mount point - */ -void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - - spin_lock(&mnt->mnt_root->d_lock); - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); -} - -void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark) -{ - struct vfsmount *mnt = mark->mnt; - struct mount *m = real_mount(mnt); - - BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&mnt->mnt_root->d_lock); - - hlist_del_init_rcu(&mark->obj_list); - mark->mnt = NULL; - - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); -} - -/* - * given a group and vfsmount, find the mark associated with that combination. - * if found take a reference to that mark and return it, else return NULL - */ -struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - struct fsnotify_mark *mark; - - spin_lock(&mnt->mnt_root->d_lock); - mark = fsnotify_find_mark(&m->mnt_fsnotify_marks, group); - spin_unlock(&mnt->mnt_root->d_lock); - - return mark; -} - -/* - * Attach an initialized mark to a given group and vfsmount. - * These marks may be used for the fsnotify backend to determine which - * event types should be delivered to which groups. - */ -int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct vfsmount *mnt, - int allow_dups) -{ - struct mount *m = real_mount(mnt); - int ret; - - mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; - - BUG_ON(!mutex_is_locked(&group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&mnt->mnt_root->d_lock); - mark->mnt = mnt; - ret = fsnotify_add_mark_list(&m->mnt_fsnotify_marks, mark, allow_dups); - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); - - return ret; -} diff --git a/include/linux/fs.h b/include/linux/fs.h index 30e5c14bd743..5d62d2c47939 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -546,6 +546,8 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 +struct fsnotify_mark_connector; + /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning @@ -645,7 +647,7 @@ struct inode { #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ - struct hlist_head i_fsnotify_marks; + struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #if IS_ENABLED(CONFIG_FS_ENCRYPTION) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e6e689b5569e..c6c69318752b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -80,6 +80,7 @@ struct fsnotify_event; struct fsnotify_mark; struct fsnotify_event_private_data; struct fsnotify_fname; +struct fsnotify_iter_info; /* * Each group much define these ops. The fsnotify infrastructure will call @@ -98,10 +99,13 @@ struct fsnotify_ops { struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie); + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); + /* called on final put+free to free memory */ + void (*free_mark)(struct fsnotify_mark *mark); }; /* @@ -163,6 +167,8 @@ struct fsnotify_group { struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ + atomic_t user_waits; /* Number of tasks waiting for user + * response */ /* groups can define private fields here or use the void *private */ union { @@ -195,6 +201,30 @@ struct fsnotify_group { #define FSNOTIFY_EVENT_INODE 2 /* + * Inode / vfsmount point to this structure which tracks all marks attached to + * the inode / vfsmount. The reference to inode / vfsmount is held by this + * structure. We destroy this structure when there are no more marks attached + * to it. The structure is protected by fsnotify_mark_srcu. + */ +struct fsnotify_mark_connector { + spinlock_t lock; +#define FSNOTIFY_OBJ_TYPE_INODE 0x01 +#define FSNOTIFY_OBJ_TYPE_VFSMOUNT 0x02 +#define FSNOTIFY_OBJ_ALL_TYPES (FSNOTIFY_OBJ_TYPE_INODE | \ + FSNOTIFY_OBJ_TYPE_VFSMOUNT) + unsigned int flags; /* Type of object [lock] */ + union { /* Object pointer [lock] */ + struct inode *inode; + struct vfsmount *mnt; + }; + union { + struct hlist_head list; + /* Used listing heads to free after srcu period expires */ + struct fsnotify_mark_connector *destroy_next; + }; +}; + +/* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events * of a type matching mask or only interested in those events. @@ -223,22 +253,16 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [obj_lock] */ + /* List of marks for inode / vfsmount [connector->lock, mark ref] */ struct hlist_node obj_list; - union { /* Object pointer [mark->lock, group->mark_mutex] */ - struct inode *inode; /* inode this mark is associated with */ - struct vfsmount *mnt; /* vfsmount this mark is associated with */ - }; + /* Head of list of marks for an object [mark ref] */ + struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; -#define FSNOTIFY_MARK_FLAG_INODE 0x01 -#define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 -#define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04 -#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08 -#define FSNOTIFY_MARK_FLAG_ALIVE 0x10 -#define FSNOTIFY_MARK_FLAG_ATTACHED 0x20 +#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 +#define FSNOTIFY_MARK_FLAG_ALIVE 0x02 +#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 unsigned int flags; /* flags [mark->lock] */ - void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */ }; #ifdef CONFIG_FSNOTIFY @@ -315,23 +339,18 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group /* functions used to manipulate the marks attached to inodes */ -/* run all marks associated with a vfsmount and update mnt->mnt_fsnotify_mask */ -extern void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt); -/* run all marks associated with an inode and update inode->i_fsnotify_mask */ -extern void fsnotify_recalc_inode_mask(struct inode *inode); -extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark)); -/* find (and take a reference) to a mark associated with group and inode */ -extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode); -/* find (and take a reference) to a mark associated with group and vfsmount */ -extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt); -/* set the ignored_mask of a mark */ -extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask); -/* set the mask of a mark (might pin the object into memory */ -extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask); -/* attach the mark to both the group and the inode */ -extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, - struct inode *inode, struct vfsmount *mnt, int allow_dups); -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group, +/* Calculate mask of events for a list of marks */ +extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); +extern void fsnotify_init_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group); +/* Find mark belonging to given group in the list of marks */ +extern struct fsnotify_mark *fsnotify_find_mark( + struct fsnotify_mark_connector __rcu **connp, + struct fsnotify_group *group); +/* attach the mark to the inode or vfsmount */ +extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, + struct vfsmount *mnt, int allow_dups); +extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, struct vfsmount *mnt, int allow_dups); /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, @@ -340,15 +359,23 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, extern void fsnotify_detach_mark(struct fsnotify_mark *mark); /* free mark */ extern void fsnotify_free_mark(struct fsnotify_mark *mark); +/* run all the marks in a group, and clear all of the marks attached to given object type */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); /* run all the marks in a group, and clear all of the vfsmount marks */ -extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); +static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); +} /* run all the marks in a group, and clear all of the inode marks */ -extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); -/* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/ -extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); +static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); +} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); +extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); +extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); /* put here because inotify does some weird stuff when destroying watches */ extern void fsnotify_init_event(struct fsnotify_event *event, diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index b16a5bdcea0d..52f368b6561e 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -103,15 +103,15 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa goto out; } - fsnotify_init_mark(&audit_mark->mark, audit_fsnotify_free_mark); + fsnotify_init_mark(&audit_mark->mark, audit_fsnotify_group); audit_mark->mark.mask = AUDIT_FS_EVENTS; audit_mark->path = pathname; audit_update_mark(audit_mark, dentry->d_inode); audit_mark->rule = krule; - ret = fsnotify_add_mark(&audit_mark->mark, audit_fsnotify_group, inode, NULL, true); + ret = fsnotify_add_mark(&audit_mark->mark, inode, NULL, true); if (ret < 0) { - audit_fsnotify_mark_free(audit_mark); + fsnotify_put_mark(&audit_mark->mark); audit_mark = ERR_PTR(ret); } out: @@ -168,7 +168,8 @@ static int audit_mark_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *dname, u32 cookie) + const unsigned char *dname, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct audit_fsnotify_mark *audit_mark; const struct inode *inode = NULL; @@ -201,6 +202,7 @@ static int audit_mark_handle_event(struct fsnotify_group *group, static const struct fsnotify_ops audit_mark_fsnotify_ops = { .handle_event = audit_mark_handle_event, + .free_mark = audit_fsnotify_free_mark, }; static int __init audit_fsnotify_init(void) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 5cfd1ea18de0..011d46e5f73f 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -155,7 +155,7 @@ static struct audit_chunk *alloc_chunk(int count) INIT_LIST_HEAD(&chunk->owners[i].list); chunk->owners[i].index = i; } - fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); + fsnotify_init_mark(&chunk->mark, audit_tree_group); chunk->mark.mask = FS_IN_IGNORED; return chunk; } @@ -164,33 +164,54 @@ enum {HASH_SIZE = 128}; static struct list_head chunk_hash_heads[HASH_SIZE]; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock); -static inline struct list_head *chunk_hash(const struct inode *inode) +/* Function to return search key in our hash from inode. */ +static unsigned long inode_to_key(const struct inode *inode) { - unsigned long n = (unsigned long)inode / L1_CACHE_BYTES; + return (unsigned long)inode; +} + +/* + * Function to return search key in our hash from chunk. Key 0 is special and + * should never be present in the hash. + */ +static unsigned long chunk_to_key(struct audit_chunk *chunk) +{ + /* + * We have a reference to the mark so it should be attached to a + * connector. + */ + if (WARN_ON_ONCE(!chunk->mark.connector)) + return 0; + return (unsigned long)chunk->mark.connector->inode; +} + +static inline struct list_head *chunk_hash(unsigned long key) +{ + unsigned long n = key / L1_CACHE_BYTES; return chunk_hash_heads + n % HASH_SIZE; } /* hash_lock & entry->lock is held by caller */ static void insert_hash(struct audit_chunk *chunk) { - struct fsnotify_mark *entry = &chunk->mark; + unsigned long key = chunk_to_key(chunk); struct list_head *list; - if (!entry->inode) + if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED)) return; - list = chunk_hash(entry->inode); + list = chunk_hash(key); list_add_rcu(&chunk->hash, list); } /* called under rcu_read_lock */ struct audit_chunk *audit_tree_lookup(const struct inode *inode) { - struct list_head *list = chunk_hash(inode); + unsigned long key = inode_to_key(inode); + struct list_head *list = chunk_hash(key); struct audit_chunk *p; list_for_each_entry_rcu(p, list, hash) { - /* mark.inode may have gone NULL, but who cares? */ - if (p->mark.inode == inode) { + if (chunk_to_key(p) == key) { atomic_long_inc(&p->refs); return p; } @@ -234,11 +255,15 @@ static void untag_chunk(struct node *p) mutex_lock(&entry->group->mark_mutex); spin_lock(&entry->lock); - if (chunk->dead || !entry->inode) { + /* + * mark_mutex protects mark from getting detached and thus also from + * mark->connector->inode getting NULL. + */ + if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { spin_unlock(&entry->lock); mutex_unlock(&entry->group->mark_mutex); if (new) - free_chunk(new); + fsnotify_put_mark(&new->mark); goto out; } @@ -262,7 +287,7 @@ static void untag_chunk(struct node *p) if (!new) goto Fallback; - if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode, + if (fsnotify_add_mark_locked(&new->mark, entry->connector->inode, NULL, 1)) { fsnotify_put_mark(&new->mark); goto Fallback; @@ -328,7 +353,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) return -ENOMEM; entry = &chunk->mark; - if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { + if (fsnotify_add_mark(entry, inode, NULL, 0)) { fsnotify_put_mark(entry); return -ENOSPC; } @@ -367,7 +392,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) struct node *p; int n; - old_entry = fsnotify_find_inode_mark(audit_tree_group, inode); + old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks, + audit_tree_group); if (!old_entry) return create_chunk(inode, tree); @@ -394,17 +420,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) mutex_lock(&old_entry->group->mark_mutex); spin_lock(&old_entry->lock); - if (!old_entry->inode) { + /* + * mark_mutex protects mark from getting detached and thus also from + * mark->connector->inode getting NULL. + */ + if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { /* old_entry is being shot, lets just lie */ spin_unlock(&old_entry->lock); mutex_unlock(&old_entry->group->mark_mutex); fsnotify_put_mark(old_entry); - free_chunk(chunk); + fsnotify_put_mark(&chunk->mark); return -ENOENT; } - if (fsnotify_add_mark_locked(chunk_entry, old_entry->group, - old_entry->inode, NULL, 1)) { + if (fsnotify_add_mark_locked(chunk_entry, + old_entry->connector->inode, NULL, 1)) { spin_unlock(&old_entry->lock); mutex_unlock(&old_entry->group->mark_mutex); fsnotify_put_mark(chunk_entry); @@ -589,7 +619,8 @@ int audit_remove_tree_rule(struct audit_krule *rule) static int compare_root(struct vfsmount *mnt, void *arg) { - return d_backing_inode(mnt->mnt_root) == arg; + return inode_to_key(d_backing_inode(mnt->mnt_root)) == + (unsigned long)arg; } void audit_trim_trees(void) @@ -624,9 +655,10 @@ void audit_trim_trees(void) list_for_each_entry(node, &tree->chunks, list) { struct audit_chunk *chunk = find_chunk(node); /* this could be NULL if the watch is dying else where... */ - struct inode *inode = chunk->mark.inode; node->index |= 1U<<31; - if (iterate_mounts(compare_root, inode, root_mnt)) + if (iterate_mounts(compare_root, + (void *)chunk_to_key(chunk), + root_mnt)) node->index &= ~(1U<<31); } spin_unlock(&hash_lock); @@ -959,7 +991,8 @@ static int audit_tree_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { return 0; } @@ -980,6 +1013,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify static const struct fsnotify_ops audit_tree_ops = { .handle_event = audit_tree_handle_event, .freeing_mark = audit_tree_freeing_mark, + .free_mark = audit_tree_destroy_watch, }; static int __init audit_tree_init(void) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e0656bd63036..62d686d96581 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -103,7 +103,7 @@ static inline struct audit_parent *audit_find_parent(struct inode *inode) struct audit_parent *parent = NULL; struct fsnotify_mark *entry; - entry = fsnotify_find_inode_mark(audit_watch_group, inode); + entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_watch_group); if (entry) parent = container_of(entry, struct audit_parent, mark); @@ -158,9 +158,9 @@ static struct audit_parent *audit_init_parent(struct path *path) INIT_LIST_HEAD(&parent->watches); - fsnotify_init_mark(&parent->mark, audit_watch_free_mark); + fsnotify_init_mark(&parent->mark, audit_watch_group); parent->mark.mask = AUDIT_FS_WATCH; - ret = fsnotify_add_mark(&parent->mark, audit_watch_group, inode, NULL, 0); + ret = fsnotify_add_mark(&parent->mark, inode, NULL, 0); if (ret < 0) { audit_free_parent(parent); return ERR_PTR(ret); @@ -473,7 +473,8 @@ static int audit_watch_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *dname, u32 cookie) + const unsigned char *dname, u32 cookie, + struct fsnotify_iter_info *iter_info) { const struct inode *inode; struct audit_parent *parent; @@ -507,6 +508,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, static const struct fsnotify_ops audit_watch_fsnotify_ops = { .handle_event = audit_watch_handle_event, + .free_mark = audit_watch_free_mark, }; static int __init audit_watch_init(void) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b2dcbe637b7c..bb724baa7ac9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -73,6 +73,7 @@ #include <linux/ctype.h> #include <linux/string.h> #include <linux/uaccess.h> +#include <linux/fsnotify_backend.h> #include <uapi/linux/limits.h> #include "audit.h" @@ -1596,7 +1597,7 @@ static inline void handle_one(const struct inode *inode) struct audit_tree_refs *p; struct audit_chunk *chunk; int count; - if (likely(hlist_empty(&inode->i_fsnotify_marks))) + if (likely(!inode->i_fsnotify_marks)) return; context = current->audit_context; p = context->trees; @@ -1639,7 +1640,7 @@ retry: seq = read_seqbegin(&rename_lock); for(;;) { struct inode *inode = d_backing_inode(d); - if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) { + if (inode && unlikely(inode->i_fsnotify_marks)) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); if (chunk) { |