diff options
-rw-r--r-- | fs/Kconfig | 10 | ||||
-rw-r--r-- | fs/inode.c | 2 | ||||
-rw-r--r-- | fs/locks.c | 138 | ||||
-rw-r--r-- | fs/namespace.c | 10 | ||||
-rw-r--r-- | include/linux/fs.h | 78 | ||||
-rw-r--r-- | include/trace/events/filelock.h | 77 |
6 files changed, 232 insertions, 83 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 6ce72d8d1ee1..2bb1ef86c411 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -73,6 +73,16 @@ config FILE_LOCKING for filesystems like NFS and for the flock() system call. Disabling this option saves about 11k. +config MANDATORY_FILE_LOCKING + bool "Enable Mandatory file locking" + depends on FILE_LOCKING + default y + help + This option enables files appropriately marked files on appropriely + mounted filesystems to support mandatory locking. + + To the best of my knowledge this is dead code that no one cares about. + source "fs/notify/Kconfig" source "fs/quota/Kconfig" diff --git a/fs/inode.c b/fs/inode.c index 5bb85a064ce7..4230f66b7410 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -225,7 +225,7 @@ void __destroy_inode(struct inode *inode) inode_detach_wb(inode); security_inode_free(inode); fsnotify_inode_delete(inode); - locks_free_lock_context(inode->i_flctx); + locks_free_lock_context(inode); if (!inode->i_nlink) { WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); atomic_long_dec(&inode->i_sb->s_remove_count); diff --git a/fs/locks.c b/fs/locks.c index 0d2b3267e2a3..a91f4ab00a90 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -119,7 +119,6 @@ #include <linux/fdtable.h> #include <linux/fs.h> #include <linux/init.h> -#include <linux/module.h> #include <linux/security.h> #include <linux/slab.h> #include <linux/syscalls.h> @@ -230,16 +229,44 @@ locks_get_lock_context(struct inode *inode, int type) ctx = smp_load_acquire(&inode->i_flctx); } out: + trace_locks_get_lock_context(inode, type, ctx); return ctx; } +static void +locks_dump_ctx_list(struct list_head *list, char *list_type) +{ + struct file_lock *fl; + + list_for_each_entry(fl, list, fl_list) { + pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid); + } +} + +static void +locks_check_ctx_lists(struct inode *inode) +{ + struct file_lock_context *ctx = inode->i_flctx; + + if (unlikely(!list_empty(&ctx->flc_flock) || + !list_empty(&ctx->flc_posix) || + !list_empty(&ctx->flc_lease))) { + pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n", + MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), + inode->i_ino); + locks_dump_ctx_list(&ctx->flc_flock, "FLOCK"); + locks_dump_ctx_list(&ctx->flc_posix, "POSIX"); + locks_dump_ctx_list(&ctx->flc_lease, "LEASE"); + } +} + void -locks_free_lock_context(struct file_lock_context *ctx) +locks_free_lock_context(struct inode *inode) { - if (ctx) { - WARN_ON_ONCE(!list_empty(&ctx->flc_flock)); - WARN_ON_ONCE(!list_empty(&ctx->flc_posix)); - WARN_ON_ONCE(!list_empty(&ctx->flc_lease)); + struct file_lock_context *ctx = inode->i_flctx; + + if (unlikely(ctx)) { + locks_check_ctx_lists(inode); kmem_cache_free(flctx_cache, ctx); } } @@ -934,7 +961,8 @@ out: return error; } -static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) +static int posix_lock_inode(struct inode *inode, struct file_lock *request, + struct file_lock *conflock) { struct file_lock *fl, *tmp; struct file_lock *new_fl = NULL; @@ -1142,6 +1170,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (new_fl2) locks_free_lock(new_fl2); locks_dispose_list(&dispose); + trace_posix_lock_inode(inode, request, error); + return error; } @@ -1162,7 +1192,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { - return __posix_lock_file(file_inode(filp), fl, conflock); + return posix_lock_inode(file_inode(filp), fl, conflock); } EXPORT_SYMBOL(posix_lock_file); @@ -1178,7 +1208,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) int error; might_sleep (); for (;;) { - error = __posix_lock_file(inode, fl, NULL); + error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1191,6 +1221,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) return error; } +#ifdef CONFIG_MANDATORY_FILE_LOCKING /** * locks_mandatory_locked - Check for an active lock * @file: the file to check @@ -1260,7 +1291,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, if (filp) { fl.fl_owner = filp; fl.fl_flags &= ~FL_SLEEP; - error = __posix_lock_file(inode, &fl, NULL); + error = posix_lock_inode(inode, &fl, NULL); if (!error) break; } @@ -1268,7 +1299,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, if (sleep) fl.fl_flags |= FL_SLEEP; fl.fl_owner = current->files; - error = __posix_lock_file(inode, &fl, NULL); + error = posix_lock_inode(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); @@ -1289,6 +1320,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, } EXPORT_SYMBOL(locks_mandatory_area); +#endif /* CONFIG_MANDATORY_FILE_LOCKING */ static void lease_clear_pending(struct file_lock *fl, int arg) { @@ -1503,12 +1535,10 @@ void lease_get_mtime(struct inode *inode, struct timespec *time) ctx = smp_load_acquire(&inode->i_flctx); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); - if (!list_empty(&ctx->flc_lease)) { - fl = list_first_entry(&ctx->flc_lease, - struct file_lock, fl_list); - if (fl->fl_type == F_WRLCK) - has_lease = true; - } + fl = list_first_entry_or_null(&ctx->flc_lease, + struct file_lock, fl_list); + if (fl && (fl->fl_type == F_WRLCK)) + has_lease = true; spin_unlock(&ctx->flc_lock); } @@ -2165,6 +2195,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, if (file_lock == NULL) return -ENOLCK; + inode = file_inode(filp); + /* * This might block, so we do it before checking the inode. */ @@ -2172,8 +2204,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, if (copy_from_user(&flock, l, sizeof(flock))) goto out; - inode = file_inode(filp); - /* Don't allow mandatory locks on files that may be memory mapped * and shared. */ @@ -2182,7 +2212,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2221,23 +2250,29 @@ again: error = do_lock_file_wait(filp, cmd, file_lock); /* - * Attempt to detect a close/fcntl race and recover by - * releasing the lock that was just acquired. - */ - /* - * we need that spin_lock here - it prevents reordering between - * update of i_flctx->flc_posix and check for it done in close(). - * rcu_read_lock() wouldn't do. + * Attempt to detect a close/fcntl race and recover by releasing the + * lock that was just acquired. There is no need to do that when we're + * unlocking though, or for OFD locks. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK && + !(file_lock->fl_flags & FL_OFDLCK)) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: + trace_fcntl_setlk(inode, file_lock, error); locks_free_lock(file_lock); return error; } @@ -2322,7 +2357,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2361,17 +2395,27 @@ again: error = do_lock_file_wait(filp, cmd, file_lock); /* - * Attempt to detect a close/fcntl race and recover by - * releasing the lock that was just acquired. + * Attempt to detect a close/fcntl race and recover by releasing the + * lock that was just acquired. There is no need to do that when we're + * unlocking though, or for OFD locks. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK && + !(file_lock->fl_flags & FL_OFDLCK)) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; @@ -2385,6 +2429,7 @@ out: */ void locks_remove_posix(struct file *filp, fl_owner_t owner) { + int error; struct file_lock lock; struct file_lock_context *ctx; @@ -2407,10 +2452,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) lock.fl_ops = NULL; lock.fl_lmops = NULL; - vfs_lock_file(filp, F_SETLK, &lock, NULL); + error = vfs_lock_file(filp, F_SETLK, &lock, NULL); if (lock.fl_ops && lock.fl_ops->fl_release_private) lock.fl_ops->fl_release_private(&lock); + trace_locks_remove_posix(file_inode(filp), &lock, error); } EXPORT_SYMBOL(locks_remove_posix); @@ -2706,7 +2752,7 @@ static int __init proc_locks_init(void) proc_create("locks", 0, NULL, &proc_locks_operations); return 0; } -module_init(proc_locks_init); +fs_initcall(proc_locks_init); #endif static int __init filelock_init(void) diff --git a/fs/namespace.c b/fs/namespace.c index 0570729c87fd..4d2c8f64b7bf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1584,6 +1584,14 @@ static inline bool may_mount(void) return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); } +static inline bool may_mandlock(void) +{ +#ifndef CONFIG_MANDATORY_FILE_LOCKING + return false; +#endif + return capable(CAP_SYS_ADMIN); +} + /* * Now umount can handle mount points as well as block devices. * This is important for filesystems which use unnamed block devices. @@ -2677,6 +2685,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, type_page, flags, data_page); if (!retval && !may_mount()) retval = -EPERM; + if (!retval && (flags & MS_MANDLOCK) && !may_mandlock()) + retval = -EPERM; if (retval) goto dput_out; diff --git a/include/linux/fs.h b/include/linux/fs.h index ef3cd36689f6..566f8e078ffc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1043,7 +1043,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); /* fs/locks.c */ -void locks_free_lock_context(struct file_lock_context *ctx); +void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); extern void locks_init_lock(struct file_lock *); extern struct file_lock * locks_alloc_lock(void); @@ -1104,7 +1104,7 @@ static inline int fcntl_getlease(struct file *filp) } static inline void -locks_free_lock_context(struct file_lock_context *ctx) +locks_free_lock_context(struct inode *inode) { } @@ -2030,7 +2030,7 @@ extern struct kobject *fs_kobj; #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 -#ifdef CONFIG_FILE_LOCKING +#ifdef CONFIG_MANDATORY_FILE_LOCKING extern int locks_mandatory_locked(struct file *); extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); @@ -2075,6 +2075,45 @@ static inline int locks_verify_truncate(struct inode *inode, return 0; } +#else /* !CONFIG_MANDATORY_FILE_LOCKING */ + +static inline int locks_mandatory_locked(struct file *file) +{ + return 0; +} + +static inline int locks_mandatory_area(int rw, struct inode *inode, + struct file *filp, loff_t offset, + size_t count) +{ + return 0; +} + +static inline int __mandatory_lock(struct inode *inode) +{ + return 0; +} + +static inline int mandatory_lock(struct inode *inode) +{ + return 0; +} + +static inline int locks_verify_locked(struct file *file) +{ + return 0; +} + +static inline int locks_verify_truncate(struct inode *inode, struct file *filp, + size_t size) +{ + return 0; +} + +#endif /* CONFIG_MANDATORY_FILE_LOCKING */ + + +#ifdef CONFIG_FILE_LOCKING static inline int break_lease(struct inode *inode, unsigned int mode) { /* @@ -2136,39 +2175,6 @@ static inline int break_layout(struct inode *inode, bool wait) } #else /* !CONFIG_FILE_LOCKING */ -static inline int locks_mandatory_locked(struct file *file) -{ - return 0; -} - -static inline int locks_mandatory_area(int rw, struct inode *inode, - struct file *filp, loff_t offset, - size_t count) -{ - return 0; -} - -static inline int __mandatory_lock(struct inode *inode) -{ - return 0; -} - -static inline int mandatory_lock(struct inode *inode) -{ - return 0; -} - -static inline int locks_verify_locked(struct file *file) -{ - return 0; -} - -static inline int locks_verify_truncate(struct inode *inode, struct file *filp, - size_t size) -{ - return 0; -} - static inline int break_lease(struct inode *inode, unsigned int mode) { return 0; diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index c72f2dc01d0b..63a7680347cb 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -34,6 +34,83 @@ { F_WRLCK, "F_WRLCK" }, \ { F_UNLCK, "F_UNLCK" }) +TRACE_EVENT(locks_get_lock_context, + TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx), + + TP_ARGS(inode, type, ctx), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(unsigned char, type) + __field(struct file_lock_context *, ctx) + ), + + TP_fast_assign( + __entry->s_dev = inode->i_sb->s_dev; + __entry->i_ino = inode->i_ino; + __entry->type = type; + __entry->ctx = ctx; + ), + + TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->i_ino, show_fl_type(__entry->type), __entry->ctx) +); + +DECLARE_EVENT_CLASS(filelock_lock, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + + TP_ARGS(inode, fl, ret), + + TP_STRUCT__entry( + __field(struct file_lock *, fl) + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(struct file_lock *, fl_next) + __field(fl_owner_t, fl_owner) + __field(unsigned int, fl_pid) + __field(unsigned int, fl_flags) + __field(unsigned char, fl_type) + __field(loff_t, fl_start) + __field(loff_t, fl_end) + __field(int, ret) + ), + + TP_fast_assign( + __entry->fl = fl ? fl : NULL; + __entry->s_dev = inode->i_sb->s_dev; + __entry->i_ino = inode->i_ino; + __entry->fl_next = fl ? fl->fl_next : NULL; + __entry->fl_owner = fl ? fl->fl_owner : NULL; + __entry->fl_pid = fl ? fl->fl_pid : 0; + __entry->fl_flags = fl ? fl->fl_flags : 0; + __entry->fl_type = fl ? fl->fl_type : 0; + __entry->fl_start = fl ? fl->fl_start : 0; + __entry->fl_end = fl ? fl->fl_end : 0; + __entry->ret = ret; + ), + + TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", + __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->i_ino, __entry->fl_next, __entry->fl_owner, + __entry->fl_pid, show_fl_flags(__entry->fl_flags), + show_fl_type(__entry->fl_type), + __entry->fl_start, __entry->fl_end, __entry->ret) +); + +DEFINE_EVENT(filelock_lock, posix_lock_inode, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + TP_ARGS(inode, fl, ret)); + +DEFINE_EVENT(filelock_lock, fcntl_setlk, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + TP_ARGS(inode, fl, ret)); + +DEFINE_EVENT(filelock_lock, locks_remove_posix, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + TP_ARGS(inode, fl, ret)); + DECLARE_EVENT_CLASS(filelock_lease, TP_PROTO(struct inode *inode, struct file_lock *fl), |