From 89aa593010135660991d05c92528c2c9163d5900 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 8 Sep 2017 15:23:18 +0800 Subject: ceph: keep auth cap when inode has flocks or posix locks file locks are tracked by inode's auth mds. dropping auth caps is equivalent to releasing all file locks. Signed-off-by: "Yan, Zheng" Acked-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 1 + fs/ceph/locks.c | 62 ++++++++++++++++++++++++++++++++++++++++++++-------- fs/ceph/mds_client.c | 5 +++++ fs/ceph/super.h | 1 + 4 files changed, 60 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f2550a076edc..6301bf299b7c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -493,6 +493,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_wb_ref = 0; ci->i_wrbuffer_ref = 0; ci->i_wrbuffer_ref_head = 0; + atomic_set(&ci->i_filelock_ref, 0); ci->i_shared_gen = 0; ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index e7cce412f2cf..316d550b9603 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -30,19 +30,46 @@ void __init ceph_flock_init(void) get_random_bytes(&lock_secret, sizeof(lock_secret)); } +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) +{ + struct inode *inode = file_inode(src->fl_file); + atomic_inc(&ceph_inode(inode)->i_filelock_ref); +} + +static void ceph_fl_release_lock(struct file_lock *fl) +{ + struct inode *inode = file_inode(fl->fl_file); + atomic_dec(&ceph_inode(inode)->i_filelock_ref); +} + +static const struct file_lock_operations ceph_fl_lock_ops = { + .fl_copy_lock = ceph_fl_copy_lock, + .fl_release_private = ceph_fl_release_lock, +}; + /** * Implement fcntl and flock locking functions. */ -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, int cmd, u8 wait, struct file_lock *fl) { - struct inode *inode = file_inode(file); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; int err; u64 length = 0; u64 owner; + if (operation == CEPH_MDS_OP_SETFILELOCK) { + /* + * increasing i_filelock_ref closes race window between + * handling request reply and adding file_lock struct to + * inode. Otherwise, auth caps may get trimmed in the + * window. Caller function will decrease the counter. + */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ceph_inode(inode)->i_filelock_ref); + } + if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) wait = 0; @@ -180,10 +207,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, */ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { - u8 lock_cmd; + struct inode *inode = file_inode(file); int err; - u8 wait = 0; u16 op = CEPH_MDS_OP_SETFILELOCK; + u8 lock_cmd; + u8 wait = 0; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; @@ -199,6 +227,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else if (IS_SETLKW(cmd)) wait = 1; + if (op == CEPH_MDS_OP_SETFILELOCK) { + /* + * increasing i_filelock_ref closes race window between + * handling request reply and adding file_lock struct to + * inode. Otherwise, i_auth_cap may get trimmed in the + * window. Caller function will decrease the counter. + */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ceph_inode(inode)->i_filelock_ref); + } + if (F_RDLCK == fl->fl_type) lock_cmd = CEPH_LOCK_SHARED; else if (F_WRLCK == fl->fl_type) @@ -206,7 +245,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else lock_cmd = CEPH_LOCK_UNLOCK; - err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); + err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { if (op != CEPH_MDS_OP_GETFILELOCK) { dout("mds locked, locking locally"); @@ -215,7 +254,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) /* undo! This should only happen if * the kernel detects local * deadlock. */ - ceph_lock_message(CEPH_LOCK_FCNTL, op, file, + ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on posix_lock_file, undid lock", err); @@ -227,8 +266,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { - u8 lock_cmd; + struct inode *inode = file_inode(file); int err; + u8 lock_cmd; u8 wait = 0; if (!(fl->fl_flags & FL_FLOCK)) @@ -239,6 +279,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) dout("ceph_flock, fl_file: %p", fl->fl_file); + /* see comment in ceph_lock */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ceph_inode(inode)->i_filelock_ref); + if (IS_SETLKW(cmd)) wait = 1; @@ -250,13 +294,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) lock_cmd = CEPH_LOCK_UNLOCK; err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, lock_cmd, wait, fl); + inode, lock_cmd, wait, fl); if (!err) { err = locks_lock_file_wait(file, fl); if (err) { ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, CEPH_LOCK_UNLOCK, 0, fl); + inode, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on locks_lock_file_wait, undid lock", err); } } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0687ab3c3267..c8a811db387a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1462,6 +1462,11 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) goto out; if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; + /* Note: it's possible that i_filelock_ref becomes non-zero + * after dropping auth caps. It doesn't hurt because reply + * of lock mds request will re-add auth caps. */ + if (atomic_read(&ci->i_filelock_ref) > 0) + goto out; } /* The inode has cached pages, but it's no longer used. * we can safely drop it */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3e27a28aa44a..100596c49353 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -352,6 +352,7 @@ struct ceph_inode_info { int i_pin_ref; int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; int i_wrbuffer_ref, i_wrbuffer_ref_head; + atomic_t i_filelock_ref; u32 i_shared_gen; /* increment each time we get FILE_SHARED */ u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ -- cgit v1.2.3 From c6db84723363790160a89dee4554ad2f0687a0c5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Sep 2017 09:58:56 +0800 Subject: ceph: make lock_to_ceph_filelock() static Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 62 ++++++++++++++++++++++++++++----------------------------- fs/ceph/super.h | 1 - 2 files changed, 31 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 316d550b9603..2927f3bc2fc9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -332,6 +332,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) *flock_count, *fcntl_count); } +/* + * Given a pointer to a lock, convert it to a ceph filelock + */ +static int lock_to_ceph_filelock(struct file_lock *lock, + struct ceph_filelock *cephlock) +{ + int err = 0; + cephlock->start = cpu_to_le64(lock->fl_start); + cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); + cephlock->client = cpu_to_le64(0); + cephlock->pid = cpu_to_le64((u64)lock->fl_pid); + cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); + + switch (lock->fl_type) { + case F_RDLCK: + cephlock->type = CEPH_LOCK_SHARED; + break; + case F_WRLCK: + cephlock->type = CEPH_LOCK_EXCL; + break; + case F_UNLCK: + cephlock->type = CEPH_LOCK_UNLOCK; + break; + default: + dout("Have unknown lock type %d", lock->fl_type); + err = -EINVAL; + } + + return err; +} + /** * Encode the flock and fcntl locks for the given inode into the ceph_filelock * array. Must be called with inode->i_lock already held. @@ -416,34 +447,3 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks, out_fail: return err; } - -/* - * Given a pointer to a lock, convert it to a ceph filelock - */ -int lock_to_ceph_filelock(struct file_lock *lock, - struct ceph_filelock *cephlock) -{ - int err = 0; - cephlock->start = cpu_to_le64(lock->fl_start); - cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); - cephlock->client = cpu_to_le64(0); - cephlock->pid = cpu_to_le64((u64)lock->fl_pid); - cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); - - switch (lock->fl_type) { - case F_RDLCK: - cephlock->type = CEPH_LOCK_SHARED; - break; - case F_WRLCK: - cephlock->type = CEPH_LOCK_EXCL; - break; - case F_UNLCK: - cephlock->type = CEPH_LOCK_UNLOCK; - break; - default: - dout("Have unknown lock type %d", lock->fl_type); - err = -EINVAL; - } - - return err; -} diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 100596c49353..0b2c801f4bbb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1012,7 +1012,6 @@ extern int ceph_encode_locks_to_buffer(struct inode *inode, extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, struct ceph_pagelist *pagelist, int num_fcntl_locks, int num_flock_locks); -extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); /* debugfs.c */ extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); -- cgit v1.2.3 From 4deb14a2593dfade102dd94a803a63cf620cfd56 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Sep 2017 10:36:28 +0800 Subject: ceph: optimize flock encoding during reconnect Don't malloc if there is no flock. Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 17 ++++++++++------- fs/ceph/mds_client.c | 34 ++++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 2927f3bc2fc9..aaea82076849 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -431,19 +431,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks, if (err) goto out_fail; - err = ceph_pagelist_append(pagelist, flocks, - num_fcntl_locks * sizeof(*flocks)); - if (err) - goto out_fail; + if (num_fcntl_locks > 0) { + err = ceph_pagelist_append(pagelist, flocks, + num_fcntl_locks * sizeof(*flocks)); + if (err) + goto out_fail; + } nlocks = cpu_to_le32(num_flock_locks); err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); if (err) goto out_fail; - err = ceph_pagelist_append(pagelist, - &flocks[num_fcntl_locks], - num_flock_locks * sizeof(*flocks)); + if (num_flock_locks > 0) { + err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks], + num_flock_locks * sizeof(*flocks)); + } out_fail: return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c8a811db387a..295cf5e42ea9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2899,26 +2899,32 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, if (recon_state->msg_version >= 2) { int num_fcntl_locks, num_flock_locks; - struct ceph_filelock *flocks; + struct ceph_filelock *flocks = NULL; size_t struct_len, total_len = 0; u8 struct_v = 0; encode_again: ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); - flocks = kmalloc((num_fcntl_locks+num_flock_locks) * - sizeof(struct ceph_filelock), GFP_NOFS); - if (!flocks) { - err = -ENOMEM; - goto out_free; - } - err = ceph_encode_locks_to_buffer(inode, flocks, - num_fcntl_locks, - num_flock_locks); - if (err) { + if (num_fcntl_locks + num_flock_locks > 0) { + flocks = kmalloc((num_fcntl_locks + num_flock_locks) * + sizeof(struct ceph_filelock), GFP_NOFS); + if (!flocks) { + err = -ENOMEM; + goto out_free; + } + err = ceph_encode_locks_to_buffer(inode, flocks, + num_fcntl_locks, + num_flock_locks); + if (err) { + kfree(flocks); + flocks = NULL; + if (err == -ENOSPC) + goto encode_again; + goto out_free; + } + } else { kfree(flocks); - if (err == -ENOSPC) - goto encode_again; - goto out_free; + flocks = NULL; } if (recon_state->msg_version >= 3) { -- cgit v1.2.3 From b3f8d68f38a879daed1eab66c0e19bc293096d34 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Sep 2017 10:58:55 +0800 Subject: ceph: handle 'session get evicted while there are file locks' When session get evicted, all file locks associated with the session get released remotely by mds. File locks tracked by kernel become stale. In this situation, set an error flag on inode. The flag makes further file locks return -EIO. Another option to handle this situation is cleanup file locks tracked kernel. I do not choose it because it is inconvenient to notify user program about the error. Signed-off-by: "Yan, Zheng" Acked-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 52 ++++++++++++++++++++++++++++++++++++++++------------ fs/ceph/mds_client.c | 21 ++++++++++++++++----- fs/ceph/super.h | 2 ++ 3 files changed, 58 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index aaea82076849..9e66f69ee8a5 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -39,7 +39,13 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) static void ceph_fl_release_lock(struct file_lock *fl) { struct inode *inode = file_inode(fl->fl_file); - atomic_dec(&ceph_inode(inode)->i_filelock_ref); + struct ceph_inode_info *ci = ceph_inode(inode); + if (atomic_dec_and_test(&ci->i_filelock_ref)) { + /* clear error when all locks are released */ + spin_lock(&ci->i_ceph_lock); + ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; + spin_unlock(&ci->i_ceph_lock); + } } static const struct file_lock_operations ceph_fl_lock_ops = { @@ -208,10 +214,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); - int err; + struct ceph_inode_info *ci = ceph_inode(inode); + int err = 0; u16 op = CEPH_MDS_OP_SETFILELOCK; - u8 lock_cmd; u8 wait = 0; + u8 lock_cmd; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; @@ -227,7 +234,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else if (IS_SETLKW(cmd)) wait = 1; - if (op == CEPH_MDS_OP_SETFILELOCK) { + spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + err = -EIO; + } else if (op == CEPH_MDS_OP_SETFILELOCK) { /* * increasing i_filelock_ref closes race window between * handling request reply and adding file_lock struct to @@ -235,7 +245,13 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) * window. Caller function will decrease the counter. */ fl->fl_ops = &ceph_fl_lock_ops; - atomic_inc(&ceph_inode(inode)->i_filelock_ref); + atomic_inc(&ci->i_filelock_ref); + } + spin_unlock(&ci->i_ceph_lock); + if (err < 0) { + if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) + posix_lock_file(file, fl, NULL); + return err; } if (F_RDLCK == fl->fl_type) @@ -247,10 +263,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { - if (op != CEPH_MDS_OP_GETFILELOCK) { + if (op == CEPH_MDS_OP_SETFILELOCK) { dout("mds locked, locking locally"); err = posix_lock_file(file, fl, NULL); - if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { + if (err) { /* undo! This should only happen if * the kernel detects local * deadlock. */ @@ -267,9 +283,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); - int err; - u8 lock_cmd; + struct ceph_inode_info *ci = ceph_inode(inode); + int err = 0; u8 wait = 0; + u8 lock_cmd; if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; @@ -279,9 +296,20 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) dout("ceph_flock, fl_file: %p", fl->fl_file); - /* see comment in ceph_lock */ - fl->fl_ops = &ceph_fl_lock_ops; - atomic_inc(&ceph_inode(inode)->i_filelock_ref); + spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + err = -EIO; + } else { + /* see comment in ceph_lock */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ci->i_filelock_ref); + } + spin_unlock(&ci->i_ceph_lock); + if (err < 0) { + if (F_UNLCK == fl->fl_type) + locks_lock_file_wait(file, fl); + return err; + } if (IS_SETLKW(cmd)) wait = 1; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 295cf5e42ea9..8c8361262ade 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1215,6 +1215,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, } spin_unlock(&mdsc->cap_dirty_lock); + if (atomic_read(&ci->i_filelock_ref) > 0) { + /* make further file lock syscall return -EIO */ + ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; + pr_warn_ratelimited(" dropping file locks for %p %lld\n", + inode, ceph_ino(inode)); + } + if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; @@ -2832,7 +2839,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; } rec; - struct ceph_inode_info *ci; + struct ceph_inode_info *ci = cap->ci; struct ceph_reconnect_state *recon_state = arg; struct ceph_pagelist *pagelist = recon_state->pagelist; char *path; @@ -2841,8 +2848,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, u64 snap_follows; struct dentry *dentry; - ci = cap->ci; - dout(" adding %p ino %llx.%llx cap %p %lld %s\n", inode, ceph_vinop(inode), cap, cap->cap_id, ceph_cap_string(cap->issued)); @@ -2875,7 +2880,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v2.pathbase = cpu_to_le64(pathbase); - rec.v2.flock_len = 0; + rec.v2.flock_len = + (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1; } else { rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); @@ -2904,7 +2910,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, u8 struct_v = 0; encode_again: - ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); + if (rec.v2.flock_len) { + ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); + } else { + num_fcntl_locks = 0; + num_flock_locks = 0; + } if (num_fcntl_locks + num_flock_locks > 0) { flocks = kmalloc((num_fcntl_locks + num_flock_locks) * sizeof(struct ceph_filelock), GFP_NOFS); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 0b2c801f4bbb..2beeec07fa76 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -488,6 +488,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, #define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */ #define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */ #define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */ +#define CEPH_I_ERROR_FILELOCK (1 << 12) /* have seen file lock errors */ + /* * We set the ERROR_WRITE bit when we start seeing write errors on an inode -- cgit v1.2.3 From 7271efa79f8bc01694d1a9fce597088a97b3b160 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sat, 7 Oct 2017 16:02:21 +0200 Subject: ceph: fix bool initialization/comparison Bool initializations should use true and false. Bool tests don't need comparisons. Signed-off-by: Thomas Meyer Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ff5d32cf9578..05ae1e472547 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1712,7 +1712,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* if we are unmounting, flush any unused caps immediately. */ if (mdsc->stopping) - is_delayed = 1; + is_delayed = true; spin_lock(&ci->i_ceph_lock); @@ -3189,8 +3189,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, int dirty = le32_to_cpu(m->dirty); int cleaned = 0; bool drop = false; - bool wake_ci = 0; - bool wake_mdsc = 0; + bool wake_ci = false; + bool wake_mdsc = false; list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { if (cf->tid == flush_tid) -- cgit v1.2.3 From 933ad2c9c8bbb1623c2d3c5753ad340152e15d9d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 10 Oct 2017 17:06:25 +0800 Subject: ceph: disable cached readdir after dropping positive dentry Ideally CEPH_CAP_FILE_SHARED should have been revoked before postive dentry get dropped. But if something goes wrong, later cached readdir may dereference the dropped dentry. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 6301bf299b7c..16d8b9dac649 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1186,6 +1186,7 @@ retry_lookup: ceph_snap(d_inode(dn)) != tvino.snap)) { dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); + ceph_dir_clear_ordered(dir); d_delete(dn); dput(dn); goto retry_lookup; @@ -1323,6 +1324,7 @@ retry_lookup: dout(" %p links to %p %llx.%llx, not %llx.%llx\n", dn, d_inode(dn), ceph_vinop(d_inode(dn)), ceph_vinop(in)); + ceph_dir_clear_ordered(dir); d_invalidate(dn); have_lease = false; } @@ -1574,6 +1576,7 @@ retry_lookup: ceph_snap(d_inode(dn)) != tvino.snap)) { dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); + __ceph_dir_clear_ordered(ci); d_delete(dn); dput(dn); goto retry_lookup; @@ -1598,7 +1601,9 @@ retry_lookup: &req->r_caps_reservation); if (ret < 0) { pr_err("fill_inode badness on %p\n", in); - if (d_really_is_negative(dn)) + if (d_really_is_positive(dn)) + __ceph_dir_clear_ordered(ci); + else iput(in); d_drop(dn); err = ret; -- cgit v1.2.3 From 76bd6ec4988065d39983ba8e93bb738313f68050 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Oct 2017 10:32:50 +0200 Subject: ceph: -EINVAL on decoding failure in ceph_mdsc_handle_fsmap() Don't set ->mdsmap_err to -ENOENT unconditionally, and drop unneeded return statement while at it. Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8c8361262ade..b76506be4228 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3879,14 +3879,14 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) goto err_out; } return; + bad: pr_err("error decoding fsmap\n"); err_out: mutex_lock(&mdsc->mutex); - mdsc->mdsmap_err = -ENOENT; + mdsc->mdsmap_err = err; __wake_requests(mdsc, &mdsc->waiting_for_map); mutex_unlock(&mdsc->mutex); - return; } /* -- cgit v1.2.3 From bb0581f01c38ff525295fc6128bc3a49202dabae Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 18 Oct 2017 12:34:25 +0100 Subject: ceph: remove unused and redundant variable dropping Variable dropping is set but never read and hence is redundant and can be removed. Cleans up clang warning: fs/ceph/caps.c:1170:2: warning: Value stored to 'dropping' is never read Signed-off-by: Colin Ian King Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 05ae1e472547..a14b2c974c9e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1160,7 +1160,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->vfs_inode; struct cap_msg_args arg; - int held, revoking, dropping; + int held, revoking; int wake = 0; int delayed = 0; int ret; @@ -1168,7 +1168,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, held = cap->issued | cap->implemented; revoking = cap->implemented & ~cap->issued; retain &= ~revoking; - dropping = cap->issued & ~retain; dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n", inode, cap, cap->session, -- cgit v1.2.3 From c8a96a31cb04c7664626ab6ada7f66c98c09efbd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 Oct 2017 08:53:58 -0400 Subject: ceph: clean up spinlocking and list handling around cleanup_cap_releases() Functions that release a lock taken in a parent frame are notoriously hard to follow. Split cleanup_cap_releases into two functions, one to detach the cap releases from the session (which should be called with the spinlock held), and another to dispose of those caps. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b76506be4228..53cde84e698a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1039,22 +1039,23 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, * session caps */ -/* caller holds s_cap_lock, we drop it */ -static void cleanup_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) - __releases(session->s_cap_lock) +static void detach_cap_releases(struct ceph_mds_session *session, + struct list_head *target) { - LIST_HEAD(tmp_list); - list_splice_init(&session->s_cap_releases, &tmp_list); + lockdep_assert_held(&session->s_cap_lock); + + list_splice_init(&session->s_cap_releases, target); session->s_num_cap_releases = 0; - spin_unlock(&session->s_cap_lock); + dout("dispose_cap_releases mds%d\n", session->s_mds); +} - dout("cleanup_cap_releases mds%d\n", session->s_mds); - while (!list_empty(&tmp_list)) { +static void dispose_cap_releases(struct ceph_mds_client *mdsc, + struct list_head *dispose) +{ + while (!list_empty(dispose)) { struct ceph_cap *cap; /* zero out the in-progress message */ - cap = list_first_entry(&tmp_list, - struct ceph_cap, session_caps); + cap = list_first_entry(dispose, struct ceph_cap, session_caps); list_del(&cap->session_caps); ceph_put_cap(mdsc, cap); } @@ -1251,6 +1252,8 @@ static void remove_session_caps(struct ceph_mds_session *session) { struct ceph_fs_client *fsc = session->s_mdsc->fsc; struct super_block *sb = fsc->sb; + LIST_HEAD(dispose); + dout("remove_session_caps on %p\n", session); iterate_session_caps(session, remove_session_caps_cb, fsc); @@ -1285,10 +1288,12 @@ static void remove_session_caps(struct ceph_mds_session *session) } // drop cap expires and unlock s_cap_lock - cleanup_cap_releases(session->s_mdsc, session); + detach_cap_releases(session, &dispose); BUG_ON(session->s_nr_caps > 0); BUG_ON(!list_empty(&session->s_cap_flushing)); + spin_unlock(&session->s_cap_lock); + dispose_cap_releases(session->s_mdsc, &dispose); } /* @@ -3015,6 +3020,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int s_nr_caps; struct ceph_pagelist *pagelist; struct ceph_reconnect_state recon_state; + LIST_HEAD(dispose); pr_info("mds%d reconnect start\n", mds); @@ -3048,7 +3054,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, */ session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ - cleanup_cap_releases(mdsc, session); + detach_cap_releases(session, &dispose); + spin_unlock(&session->s_cap_lock); + dispose_cap_releases(mdsc, &dispose); /* trim unused caps to reduce MDS's cache rejoin time */ if (mdsc->fsc->sb->s_root) -- cgit v1.2.3 From 080a330e1d9142b9d958a40dcef3ae5ae5d8820a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Oct 2017 10:58:40 -0400 Subject: ceph: present consistent fsid, regardless of arch endianness Since its inception, ceph has presented the fsid as an opaque value without any sort of endianness conversion. This means that the value presented is different on architectures of different endianness. While the value that should be stuffed into f_fsid is poorly-defined, I think it would be best to strive for consistency here between architectures, and clients (we need to present this properly to the userland client as well). Change ceph_statfs to convert the opaque words to host-endian before doing the xor. On an upgrade, a big-endian box may see a different fsid than it did before, but little-endian arches should see no change with this patch. Signed-off-by: Jeff Layton Reviewed-by: Sage Weil Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e4082afedcb1..fe9fbb3f13f7 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -84,8 +84,9 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - /* leave fsid little-endian, regardless of host endianness */ - fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); + /* Must convert the fsid, for consistent values across arches */ + fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^ + le64_to_cpu(*((__le64 *)&monmap->fsid + 1)); buf->f_fsid.val[0] = fsid & 0xffffffff; buf->f_fsid.val[1] = fsid >> 32; -- cgit v1.2.3 From 81302565178cfa2d419f5e9743add02997c6b2dc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 10:58:30 -0400 Subject: ceph: remove the bump of i_version Eventually, we'll want to wire cephfs up to use the change attribute that the cluster tracks instead, but for now this is unneeded. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 16d8b9dac649..ab81652198c4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -787,7 +787,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, /* update inode */ ci->i_version = le64_to_cpu(info->version); - inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; -- cgit v1.2.3 From ec1dff25b0a012711e10290039fbc8e1c5dd69fb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 31 Oct 2017 15:51:14 -0400 Subject: ceph: silence sparse endianness warning in encode_caps_cb sparse warns: fs/ceph/mds_client.c:2887:34: warning: incorrect type in assignment (different base types) fs/ceph/mds_client.c:2887:34: expected restricted __le32 [assigned] [usertype] flock_len fs/ceph/mds_client.c:2887:34: got int At this point, it's just being used as a flag. It gets overwritten later if the rest of the encoding succeeds. Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 53cde84e698a..ab69dcb70e8a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2885,8 +2885,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v2.pathbase = cpu_to_le64(pathbase); - rec.v2.flock_len = - (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1; + rec.v2.flock_len = (__force __le32) + ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); -- cgit v1.2.3