diff options
Diffstat (limited to 'fs/locks.c')
-rw-r--r-- | fs/locks.c | 344 |
1 files changed, 216 insertions, 128 deletions
diff --git a/fs/locks.c b/fs/locks.c index 2ecb4db8c840..f0b24d98f36b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -11,11 +11,11 @@ * * Miscellaneous edits, and a total rewrite of posix_lock_file() code. * Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994 - * + * * Converted file_lock_table to a linked list from an array, which eliminates * the limits on how many active file locks are open. * Chad Page (pageone@netcom.com), November 27, 1994 - * + * * Removed dependency on file descriptors. dup()'ed file descriptors now * get the same locks as the original file descriptors, and a close() on * any file descriptor removes ALL the locks on the file for the current @@ -41,7 +41,7 @@ * with a file pointer (filp). As a result they can be shared by a parent * process and its children after a fork(). They are removed when the last * file descriptor referring to the file pointer is closed (unless explicitly - * unlocked). + * unlocked). * * FL_FLOCK locks never deadlock, an existing lock is always removed before * upgrading from shared to exclusive (or vice versa). When this happens @@ -50,7 +50,7 @@ * Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995 * * Removed some race conditions in flock_lock_file(), marked other possible - * races. Just grep for FIXME to see them. + * races. Just grep for FIXME to see them. * Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996. * * Addressed Dmitry's concerns. Deadlock checking no longer recursive. @@ -112,6 +112,46 @@ * Leases and LOCK_MAND * Matthew Wilcox <willy@debian.org>, June, 2000. * Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000. + * + * Locking conflicts and dependencies: + * If multiple threads attempt to lock the same byte (or flock the same file) + * only one can be granted the lock, and other must wait their turn. + * The first lock has been "applied" or "granted", the others are "waiting" + * and are "blocked" by the "applied" lock.. + * + * Waiting and applied locks are all kept in trees whose properties are: + * + * - the root of a tree may be an applied or waiting lock. + * - every other node in the tree is a waiting lock that + * conflicts with every ancestor of that node. + * + * Every such tree begins life as a waiting singleton which obviously + * satisfies the above properties. + * + * The only ways we modify trees preserve these properties: + * + * 1. We may add a new leaf node, but only after first verifying that it + * conflicts with all of its ancestors. + * 2. We may remove the root of a tree, creating a new singleton + * tree from the root and N new trees rooted in the immediate + * children. + * 3. If the root of a tree is not currently an applied lock, we may + * apply it (if possible). + * 4. We may upgrade the root of the tree (either extend its range, + * or upgrade its entire range from read to write). + * + * When an applied lock is modified in a way that reduces or downgrades any + * part of its range, we remove all its children (2 above). This particularly + * happens when a lock is unlocked. + * + * For each of those child trees we "wake up" the thread which is + * waiting for the lock so it can continue handling as follows: if the + * root of the tree applies, we do so (3). If it doesn't, it must + * conflict with some applied lock. We remove (wake up) all of its children + * (2), and add it is a new leaf to the tree rooted in the applied + * lock (1). We then repeat the process recursively with those + * children. + * */ #include <linux/capability.h> @@ -189,9 +229,9 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * - * In addition, it also protects the fl->fl_block list, and the fl->fl_next - * pointer for file_lock structures that are acting as lock requests (in - * contrast to those that are acting as records of acquired locks). + * In addition, it also protects the fl->fl_blocked_requests list, and the + * fl->fl_blocker pointer for file_lock structures that are acting as lock + * requests (in contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, * we often hold the flc_lock as well. In certain cases, when reading the fields @@ -293,7 +333,8 @@ static void locks_init_lock_heads(struct file_lock *fl) { INIT_HLIST_NODE(&fl->fl_link); INIT_LIST_HEAD(&fl->fl_list); - INIT_LIST_HEAD(&fl->fl_block); + INIT_LIST_HEAD(&fl->fl_blocked_requests); + INIT_LIST_HEAD(&fl->fl_blocked_member); init_waitqueue_head(&fl->fl_wait); } @@ -332,7 +373,8 @@ void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); BUG_ON(!list_empty(&fl->fl_list)); - BUG_ON(!list_empty(&fl->fl_block)); + BUG_ON(!list_empty(&fl->fl_blocked_requests)); + BUG_ON(!list_empty(&fl->fl_blocked_member)); BUG_ON(!hlist_unhashed(&fl->fl_link)); locks_release_private(fl); @@ -357,7 +399,6 @@ void locks_init_lock(struct file_lock *fl) memset(fl, 0, sizeof(struct file_lock)); locks_init_lock_heads(fl); } - EXPORT_SYMBOL(locks_init_lock); /* @@ -397,9 +438,26 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl) fl->fl_ops->fl_copy_lock(new, fl); } } - EXPORT_SYMBOL(locks_copy_lock); +static void locks_move_blocks(struct file_lock *new, struct file_lock *fl) +{ + struct file_lock *f; + + /* + * As ctx->flc_lock is held, new requests cannot be added to + * ->fl_blocked_requests, so we don't need a lock to check if it + * is empty. + */ + if (list_empty(&fl->fl_blocked_requests)) + return; + spin_lock(&blocked_lock_lock); + list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests); + list_for_each_entry(f, &fl->fl_blocked_requests, fl_blocked_member) + f->fl_blocker = new; + spin_unlock(&blocked_lock_lock); +} + static inline int flock_translate_cmd(int cmd) { if (cmd & LOCK_MAND) return cmd & (LOCK_MAND | LOCK_RW); @@ -416,17 +474,20 @@ static inline int flock_translate_cmd(int cmd) { /* Fill in a file_lock structure with an appropriate FLOCK lock. */ static struct file_lock * -flock_make_lock(struct file *filp, unsigned int cmd) +flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl) { - struct file_lock *fl; int type = flock_translate_cmd(cmd); if (type < 0) return ERR_PTR(type); - - fl = locks_alloc_lock(); - if (fl == NULL) - return ERR_PTR(-ENOMEM); + + if (fl == NULL) { + fl = locks_alloc_lock(); + if (fl == NULL) + return ERR_PTR(-ENOMEM); + } else { + locks_init_lock(fl); + } fl->fl_file = filp; fl->fl_owner = filp; @@ -434,7 +495,7 @@ flock_make_lock(struct file *filp, unsigned int cmd) fl->fl_flags = FL_FLOCK; fl->fl_type = type; fl->fl_end = OFFSET_MAX; - + return fl; } @@ -666,16 +727,58 @@ static void locks_delete_global_blocked(struct file_lock *waiter) static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); - list_del_init(&waiter->fl_block); - waiter->fl_next = NULL; + list_del_init(&waiter->fl_blocked_member); + waiter->fl_blocker = NULL; } -static void locks_delete_block(struct file_lock *waiter) +static void __locks_wake_up_blocks(struct file_lock *blocker) { + while (!list_empty(&blocker->fl_blocked_requests)) { + struct file_lock *waiter; + + waiter = list_first_entry(&blocker->fl_blocked_requests, + struct file_lock, fl_blocked_member); + __locks_delete_block(waiter); + if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) + waiter->fl_lmops->lm_notify(waiter); + else + wake_up(&waiter->fl_wait); + } +} + +/** + * locks_delete_lock - stop waiting for a file lock + * @waiter: the lock which was waiting + * + * lockd/nfsd need to disconnect the lock while working on it. + */ +int locks_delete_block(struct file_lock *waiter) +{ + int status = -ENOENT; + + /* + * If fl_blocker is NULL, it won't be set again as this thread + * "owns" the lock and is the only one that might try to claim + * the lock. So it is safe to test fl_blocker locklessly. + * Also if fl_blocker is NULL, this waiter is not listed on + * fl_blocked_requests for some lock, so no other request can + * be added to the list of fl_blocked_requests for this + * request. So if fl_blocker is NULL, it is safe to + * locklessly check if fl_blocked_requests is empty. If both + * of these checks succeed, there is no need to take the lock. + */ + if (waiter->fl_blocker == NULL && + list_empty(&waiter->fl_blocked_requests)) + return status; spin_lock(&blocked_lock_lock); + if (waiter->fl_blocker) + status = 0; + __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); spin_unlock(&blocked_lock_lock); + return status; } +EXPORT_SYMBOL(locks_delete_block); /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in @@ -683,26 +786,49 @@ static void locks_delete_block(struct file_lock *waiter) * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The - * fl_block list itself is protected by the blocked_lock_lock, but by ensuring - * that the flc_lock is also held on insertions we can avoid taking the - * blocked_lock_lock in some cases when we see that the fl_block list is empty. + * fl_blocked_requests list itself is protected by the blocked_lock_lock, + * but by ensuring that the flc_lock is also held on insertions we can avoid + * taking the blocked_lock_lock in some cases when we see that the + * fl_blocked_requests list is empty. + * + * Rather than just adding to the list, we check for conflicts with any existing + * waiters, and add beneath any waiter that blocks the new waiter. + * Thus wakeups don't happen until needed. */ static void __locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) + struct file_lock *waiter, + bool conflict(struct file_lock *, + struct file_lock *)) { - BUG_ON(!list_empty(&waiter->fl_block)); - waiter->fl_next = blocker; - list_add_tail(&waiter->fl_block, &blocker->fl_block); + struct file_lock *fl; + BUG_ON(!list_empty(&waiter->fl_blocked_member)); + +new_blocker: + list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member) + if (conflict(fl, waiter)) { + blocker = fl; + goto new_blocker; + } + waiter->fl_blocker = blocker; + list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); + + /* The requests in waiter->fl_blocked are known to conflict with + * waiter, but might not conflict with blocker, or the requests + * and lock which block it. So they all need to be woken. + */ + __locks_wake_up_blocks(waiter); } /* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) + struct file_lock *waiter, + bool conflict(struct file_lock *, + struct file_lock *)) { spin_lock(&blocked_lock_lock); - __locks_insert_block(blocker, waiter); + __locks_insert_block(blocker, waiter, conflict); spin_unlock(&blocked_lock_lock); } @@ -716,25 +842,15 @@ static void locks_wake_up_blocks(struct file_lock *blocker) /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and - * the flc_lock is always held here. Note that removal from the fl_block - * list does not require the flc_lock, so we must recheck list_empty() - * after acquiring the blocked_lock_lock. + * the flc_lock is always held here. Note that removal from the + * fl_blocked_requests list does not require the flc_lock, so we must + * recheck list_empty() after acquiring the blocked_lock_lock. */ - if (list_empty(&blocker->fl_block)) + if (list_empty(&blocker->fl_blocked_requests)) return; spin_lock(&blocked_lock_lock); - while (!list_empty(&blocker->fl_block)) { - struct file_lock *waiter; - - waiter = list_first_entry(&blocker->fl_block, - struct file_lock, fl_block); - __locks_delete_block(waiter); - if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) - waiter->fl_lmops->lm_notify(waiter); - else - wake_up(&waiter->fl_wait); - } + __locks_wake_up_blocks(blocker); spin_unlock(&blocked_lock_lock); } @@ -766,47 +882,50 @@ locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose) /* Determine if lock sys_fl blocks lock caller_fl. Common functionality * checks for shared/exclusive status of overlapping locks. */ -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { if (sys_fl->fl_type == F_WRLCK) - return 1; + return true; if (caller_fl->fl_type == F_WRLCK) - return 1; - return 0; + return true; + return false; } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict(). */ -static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool posix_locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { /* POSIX locks owned by the same process do not conflict with * each other. */ if (posix_same_owner(caller_fl, sys_fl)) - return (0); + return false; /* Check whether they overlap */ if (!locks_overlap(caller_fl, sys_fl)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific * checking before calling the locks_conflict(). */ -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool flock_locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { /* FLOCK locks referring to the same filp do not conflict with * each other. */ if (caller_fl->fl_file == sys_fl->fl_file) - return (0); + return false; if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } void @@ -877,8 +996,11 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) struct file_lock *fl; hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { - if (posix_same_owner(fl, block_fl)) - return fl->fl_next; + if (posix_same_owner(fl, block_fl)) { + while (fl->fl_blocker) + fl = fl->fl_blocker; + return fl; + } } return NULL; } @@ -965,12 +1087,13 @@ find_conflict: if (!(request->fl_flags & FL_SLEEP)) goto out; error = FILE_LOCK_DEFERRED; - locks_insert_block(fl, request); + locks_insert_block(fl, request, flock_locks_conflict); goto out; } if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, &ctx->flc_flock); new_fl = NULL; error = 0; @@ -1039,12 +1162,13 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, spin_lock(&blocked_lock_lock); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; - __locks_insert_block(fl, request); + __locks_insert_block(fl, request, + posix_locks_conflict); } spin_unlock(&blocked_lock_lock); goto out; - } - } + } + } /* If we're just looking for a conflict, we're done. */ error = 0; @@ -1164,6 +1288,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, goto out; } locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, &fl->fl_list); fl = new_fl; new_fl = NULL; @@ -1237,13 +1362,11 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); - if (!error) - continue; - - locks_delete_block(fl); - break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + if (error) + break; } + locks_delete_block(fl); return error; } @@ -1324,7 +1447,7 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, error = posix_lock_inode(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); + error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker); if (!error) { /* * If we've been sleeping someone might have @@ -1334,13 +1457,12 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, continue; } - locks_delete_block(&fl); break; } + locks_delete_block(&fl); return error; } - EXPORT_SYMBOL(locks_mandatory_area); #endif /* CONFIG_MANDATORY_FILE_LOCKING */ @@ -1511,14 +1633,14 @@ restart: break_time -= jiffies; if (break_time == 0) break_time++; - locks_insert_block(fl, new_fl); + locks_insert_block(fl, new_fl, leases_conflict); trace_break_lease_block(inode, new_fl); spin_unlock(&ctx->flc_lock); percpu_up_read_preempt_enable(&file_rwsem); locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, - !new_fl->fl_next, break_time); + !new_fl->fl_blocker, break_time); percpu_down_read_preempt_disable(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1542,7 +1664,6 @@ out: locks_free_lock(new_fl); return error; } - EXPORT_SYMBOL(__break_lease); /** @@ -1573,7 +1694,6 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) if (has_lease) *time = current_time(inode); } - EXPORT_SYMBOL(lease_get_mtime); /** @@ -1628,8 +1748,8 @@ int fcntl_getlease(struct file *filp) /** * check_conflicting_open - see if the given dentry points to a file that has - * an existing open that would conflict with the - * desired lease. + * an existing open that would conflict with the + * desired lease. * @dentry: dentry to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags @@ -1646,7 +1766,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags) if (flags & FL_LAYOUT) return 0; - if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) + if ((arg == F_RDLCK) && inode_is_open_for_write(inode)) return -EAGAIN; if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || @@ -1853,7 +1973,7 @@ EXPORT_SYMBOL(generic_setlease); * @arg: type of lease to obtain * @lease: file_lock to use when adding a lease * @priv: private info for lm_setup when adding a lease (may be - * NULL if lm_setup doesn't require it) + * NULL if lm_setup doesn't require it) * * Call this to establish a lease on the file. The "lease" argument is not * used for F_UNLCK requests and may be NULL. For commands that set or alter @@ -1931,13 +2051,11 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); - if (!error) - continue; - - locks_delete_block(fl); - break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + if (error) + break; } + locks_delete_block(fl); return error; } @@ -2001,7 +2119,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) !(f.file->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; - lock = flock_make_lock(f.file, cmd); + lock = flock_make_lock(f.file, cmd, NULL); if (IS_ERR(lock)) { error = PTR_ERR(lock); goto out_putf; @@ -2143,7 +2261,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock) error = vfs_test_lock(filp, fl); if (error) goto out; - + flock->l_type = fl->fl_type; if (fl->fl_type != F_UNLCK) { error = posix_lock_to_flock(flock, fl); @@ -2210,13 +2328,11 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); - if (!error) - continue; - - locks_delete_block(fl); - break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + if (error) + break; } + locks_delete_block(fl); return error; } @@ -2476,6 +2592,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) if (!ctx || list_empty(&ctx->flc_posix)) return; + locks_init_lock(&lock); lock.fl_type = F_UNLCK; lock.fl_flags = FL_POSIX | FL_CLOSE; lock.fl_start = 0; @@ -2492,26 +2609,21 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) lock.fl_ops->fl_release_private(&lock); trace_locks_remove_posix(inode, &lock, error); } - EXPORT_SYMBOL(locks_remove_posix); /* The i_flctx must be valid when calling into here */ static void locks_remove_flock(struct file *filp, struct file_lock_context *flctx) { - struct file_lock fl = { - .fl_owner = filp, - .fl_pid = current->tgid, - .fl_file = filp, - .fl_flags = FL_FLOCK | FL_CLOSE, - .fl_type = F_UNLCK, - .fl_end = OFFSET_MAX, - }; + struct file_lock fl; struct inode *inode = locks_inode(filp); if (list_empty(&flctx->flc_flock)) return; + flock_make_lock(filp, LOCK_UN, &fl); + fl.fl_flags |= FL_CLOSE; + if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else @@ -2570,27 +2682,6 @@ void locks_remove_file(struct file *filp) } /** - * posix_unblock_lock - stop waiting for a file lock - * @waiter: the lock which was waiting - * - * lockd needs to block waiting for locks. - */ -int -posix_unblock_lock(struct file_lock *waiter) -{ - int status = 0; - - spin_lock(&blocked_lock_lock); - if (waiter->fl_next) - __locks_delete_block(waiter); - else - status = -ENOENT; - spin_unlock(&blocked_lock_lock); - return status; -} -EXPORT_SYMBOL(posix_unblock_lock); - -/** * vfs_cancel_lock - file byte range unblock lock * @filp: The file to apply the unblock to * @fl: The lock to be unblocked @@ -2603,7 +2694,6 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } - EXPORT_SYMBOL_GPL(vfs_cancel_lock); #ifdef CONFIG_PROC_FS @@ -2707,7 +2797,7 @@ static int locks_show(struct seq_file *f, void *v) lock_get_status(f, fl, iter->li_pos, ""); - list_for_each_entry(bfl, &fl->fl_block, fl_block) + list_for_each_entry(bfl, &fl->fl_blocked_requests, fl_blocked_member) lock_get_status(f, bfl, iter->li_pos, " ->"); return 0; @@ -2803,7 +2893,6 @@ static int __init filelock_init(void) filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); - for_each_possible_cpu(i) { struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i); @@ -2813,5 +2902,4 @@ static int __init filelock_init(void) return 0; } - core_initcall(filelock_init); |