From 89aa593010135660991d05c92528c2c9163d5900 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Fri, 8 Sep 2017 15:23:18 +0800
Subject: ceph: keep auth cap when inode has flocks or posix locks

file locks are tracked by inode's auth mds. dropping auth caps
is equivalent to releasing all file locks.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c      |  1 +
 fs/ceph/locks.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++--------
 fs/ceph/mds_client.c |  5 +++++
 fs/ceph/super.h      |  1 +
 4 files changed, 60 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f2550a076edc..6301bf299b7c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -493,6 +493,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_wb_ref = 0;
 	ci->i_wrbuffer_ref = 0;
 	ci->i_wrbuffer_ref_head = 0;
+	atomic_set(&ci->i_filelock_ref, 0);
 	ci->i_shared_gen = 0;
 	ci->i_rdcache_gen = 0;
 	ci->i_rdcache_revoking = 0;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index e7cce412f2cf..316d550b9603 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -30,19 +30,46 @@ void __init ceph_flock_init(void)
 	get_random_bytes(&lock_secret, sizeof(lock_secret));
 }
 
+static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
+{
+	struct inode *inode = file_inode(src->fl_file);
+	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+}
+
+static void ceph_fl_release_lock(struct file_lock *fl)
+{
+	struct inode *inode = file_inode(fl->fl_file);
+	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
+}
+
+static const struct file_lock_operations ceph_fl_lock_ops = {
+	.fl_copy_lock = ceph_fl_copy_lock,
+	.fl_release_private = ceph_fl_release_lock,
+};
+
 /**
  * Implement fcntl and flock locking functions.
  */
-static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
+static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 			     int cmd, u8 wait, struct file_lock *fl)
 {
-	struct inode *inode = file_inode(file);
 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	int err;
 	u64 length = 0;
 	u64 owner;
 
+	if (operation == CEPH_MDS_OP_SETFILELOCK) {
+		/*
+		 * increasing i_filelock_ref closes race window between
+		 * handling request reply and adding file_lock struct to
+		 * inode. Otherwise, auth caps may get trimmed in the
+		 * window. Caller function will decrease the counter.
+		 */
+		fl->fl_ops = &ceph_fl_lock_ops;
+		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	}
+
 	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
 		wait = 0;
 
@@ -180,10 +207,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
  */
 int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u8 lock_cmd;
+	struct inode *inode = file_inode(file);
 	int err;
-	u8 wait = 0;
 	u16 op = CEPH_MDS_OP_SETFILELOCK;
+	u8 lock_cmd;
+	u8 wait = 0;
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
@@ -199,6 +227,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else if (IS_SETLKW(cmd))
 		wait = 1;
 
+	if (op == CEPH_MDS_OP_SETFILELOCK) {
+		/*
+		 * increasing i_filelock_ref closes race window between
+		 * handling request reply and adding file_lock struct to
+		 * inode. Otherwise, i_auth_cap may get trimmed in the
+		 * window. Caller function will decrease the counter.
+		 */
+		fl->fl_ops = &ceph_fl_lock_ops;
+		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	}
+
 	if (F_RDLCK == fl->fl_type)
 		lock_cmd = CEPH_LOCK_SHARED;
 	else if (F_WRLCK == fl->fl_type)
@@ -206,7 +245,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else
 		lock_cmd = CEPH_LOCK_UNLOCK;
 
-	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
+	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
 	if (!err) {
 		if (op != CEPH_MDS_OP_GETFILELOCK) {
 			dout("mds locked, locking locally");
@@ -215,7 +254,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				/* undo! This should only happen if
 				 * the kernel detects local
 				 * deadlock. */
-				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
 						  CEPH_LOCK_UNLOCK, 0, fl);
 				dout("got %d on posix_lock_file, undid lock",
 				     err);
@@ -227,8 +266,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 
 int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u8 lock_cmd;
+	struct inode *inode = file_inode(file);
 	int err;
+	u8 lock_cmd;
 	u8 wait = 0;
 
 	if (!(fl->fl_flags & FL_FLOCK))
@@ -239,6 +279,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 
 	dout("ceph_flock, fl_file: %p", fl->fl_file);
 
+	/* see comment in ceph_lock */
+	fl->fl_ops = &ceph_fl_lock_ops;
+	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+
 	if (IS_SETLKW(cmd))
 		wait = 1;
 
@@ -250,13 +294,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 		lock_cmd = CEPH_LOCK_UNLOCK;
 
 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
-				file, lock_cmd, wait, fl);
+				inode, lock_cmd, wait, fl);
 	if (!err) {
 		err = locks_lock_file_wait(file, fl);
 		if (err) {
 			ceph_lock_message(CEPH_LOCK_FLOCK,
 					  CEPH_MDS_OP_SETFILELOCK,
-					  file, CEPH_LOCK_UNLOCK, 0, fl);
+					  inode, CEPH_LOCK_UNLOCK, 0, fl);
 			dout("got %d on locks_lock_file_wait, undid lock", err);
 		}
 	}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0687ab3c3267..c8a811db387a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1462,6 +1462,11 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
 			goto out;
 		if ((used | wanted) & CEPH_CAP_ANY_WR)
 			goto out;
+		/* Note: it's possible that i_filelock_ref becomes non-zero
+		 * after dropping auth caps. It doesn't hurt because reply
+		 * of lock mds request will re-add auth caps. */
+		if (atomic_read(&ci->i_filelock_ref) > 0)
+			goto out;
 	}
 	/* The inode has cached pages, but it's no longer used.
 	 * we can safely drop it */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3e27a28aa44a..100596c49353 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -352,6 +352,7 @@ struct ceph_inode_info {
 	int i_pin_ref;
 	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
+	atomic_t i_filelock_ref;
 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
-- 
cgit v1.2.3


From c6db84723363790160a89dee4554ad2f0687a0c5 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 11 Sep 2017 09:58:56 +0800
Subject: ceph: make lock_to_ceph_filelock() static

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/locks.c | 62 ++++++++++++++++++++++++++++-----------------------------
 fs/ceph/super.h |  1 -
 2 files changed, 31 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 316d550b9603..2927f3bc2fc9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -332,6 +332,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 	     *flock_count, *fcntl_count);
 }
 
+/*
+ * Given a pointer to a lock, convert it to a ceph filelock
+ */
+static int lock_to_ceph_filelock(struct file_lock *lock,
+				 struct ceph_filelock *cephlock)
+{
+	int err = 0;
+	cephlock->start = cpu_to_le64(lock->fl_start);
+	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
+	cephlock->client = cpu_to_le64(0);
+	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
+	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+
+	switch (lock->fl_type) {
+	case F_RDLCK:
+		cephlock->type = CEPH_LOCK_SHARED;
+		break;
+	case F_WRLCK:
+		cephlock->type = CEPH_LOCK_EXCL;
+		break;
+	case F_UNLCK:
+		cephlock->type = CEPH_LOCK_UNLOCK;
+		break;
+	default:
+		dout("Have unknown lock type %d", lock->fl_type);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
 /**
  * Encode the flock and fcntl locks for the given inode into the ceph_filelock
  * array. Must be called with inode->i_lock already held.
@@ -416,34 +447,3 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 out_fail:
 	return err;
 }
-
-/*
- * Given a pointer to a lock, convert it to a ceph filelock
- */
-int lock_to_ceph_filelock(struct file_lock *lock,
-			  struct ceph_filelock *cephlock)
-{
-	int err = 0;
-	cephlock->start = cpu_to_le64(lock->fl_start);
-	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
-	cephlock->client = cpu_to_le64(0);
-	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
-	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
-
-	switch (lock->fl_type) {
-	case F_RDLCK:
-		cephlock->type = CEPH_LOCK_SHARED;
-		break;
-	case F_WRLCK:
-		cephlock->type = CEPH_LOCK_EXCL;
-		break;
-	case F_UNLCK:
-		cephlock->type = CEPH_LOCK_UNLOCK;
-		break;
-	default:
-		dout("Have unknown lock type %d", lock->fl_type);
-		err = -EINVAL;
-	}
-
-	return err;
-}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 100596c49353..0b2c801f4bbb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1012,7 +1012,6 @@ extern int ceph_encode_locks_to_buffer(struct inode *inode,
 extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 				  struct ceph_pagelist *pagelist,
 				  int num_fcntl_locks, int num_flock_locks);
-extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
 
 /* debugfs.c */
 extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
-- 
cgit v1.2.3


From 4deb14a2593dfade102dd94a803a63cf620cfd56 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 11 Sep 2017 10:36:28 +0800
Subject: ceph: optimize flock encoding during reconnect

Don't malloc if there is no flock.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/locks.c      | 17 ++++++++++-------
 fs/ceph/mds_client.c | 34 ++++++++++++++++++++--------------
 2 files changed, 30 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 2927f3bc2fc9..aaea82076849 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -431,19 +431,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 	if (err)
 		goto out_fail;
 
-	err = ceph_pagelist_append(pagelist, flocks,
-				   num_fcntl_locks * sizeof(*flocks));
-	if (err)
-		goto out_fail;
+	if (num_fcntl_locks > 0) {
+		err = ceph_pagelist_append(pagelist, flocks,
+					   num_fcntl_locks * sizeof(*flocks));
+		if (err)
+			goto out_fail;
+	}
 
 	nlocks = cpu_to_le32(num_flock_locks);
 	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
 	if (err)
 		goto out_fail;
 
-	err = ceph_pagelist_append(pagelist,
-				   &flocks[num_fcntl_locks],
-				   num_flock_locks * sizeof(*flocks));
+	if (num_flock_locks > 0) {
+		err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
+					   num_flock_locks * sizeof(*flocks));
+	}
 out_fail:
 	return err;
 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c8a811db387a..295cf5e42ea9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2899,26 +2899,32 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 
 	if (recon_state->msg_version >= 2) {
 		int num_fcntl_locks, num_flock_locks;
-		struct ceph_filelock *flocks;
+		struct ceph_filelock *flocks = NULL;
 		size_t struct_len, total_len = 0;
 		u8 struct_v = 0;
 
 encode_again:
 		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
-		flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
-				 sizeof(struct ceph_filelock), GFP_NOFS);
-		if (!flocks) {
-			err = -ENOMEM;
-			goto out_free;
-		}
-		err = ceph_encode_locks_to_buffer(inode, flocks,
-						  num_fcntl_locks,
-						  num_flock_locks);
-		if (err) {
+		if (num_fcntl_locks + num_flock_locks > 0) {
+			flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
+					 sizeof(struct ceph_filelock), GFP_NOFS);
+			if (!flocks) {
+				err = -ENOMEM;
+				goto out_free;
+			}
+			err = ceph_encode_locks_to_buffer(inode, flocks,
+							  num_fcntl_locks,
+							  num_flock_locks);
+			if (err) {
+				kfree(flocks);
+				flocks = NULL;
+				if (err == -ENOSPC)
+					goto encode_again;
+				goto out_free;
+			}
+		} else {
 			kfree(flocks);
-			if (err == -ENOSPC)
-				goto encode_again;
-			goto out_free;
+			flocks = NULL;
 		}
 
 		if (recon_state->msg_version >= 3) {
-- 
cgit v1.2.3


From b3f8d68f38a879daed1eab66c0e19bc293096d34 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 11 Sep 2017 10:58:55 +0800
Subject: ceph: handle 'session get evicted while there are file locks'

When session get evicted, all file locks associated with the session
get released remotely by mds. File locks tracked by kernel become
stale. In this situation, set an error flag on inode. The flag makes
further file locks return -EIO.

Another option to handle this situation is cleanup file locks tracked
kernel. I do not choose it because it is inconvenient to notify user
program about the error.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/locks.c      | 52 ++++++++++++++++++++++++++++++++++++++++------------
 fs/ceph/mds_client.c | 21 ++++++++++++++++-----
 fs/ceph/super.h      |  2 ++
 3 files changed, 58 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index aaea82076849..9e66f69ee8a5 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -39,7 +39,13 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
 	struct inode *inode = file_inode(fl->fl_file);
-	atomic_dec(&ceph_inode(inode)->i_filelock_ref);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
+		/* clear error when all locks are released */
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
+		spin_unlock(&ci->i_ceph_lock);
+	}
 }
 
 static const struct file_lock_operations ceph_fl_lock_ops = {
@@ -208,10 +214,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
-	int err;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err = 0;
 	u16 op = CEPH_MDS_OP_SETFILELOCK;
-	u8 lock_cmd;
 	u8 wait = 0;
+	u8 lock_cmd;
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
@@ -227,7 +234,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else if (IS_SETLKW(cmd))
 		wait = 1;
 
-	if (op == CEPH_MDS_OP_SETFILELOCK) {
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+		err = -EIO;
+	} else if (op == CEPH_MDS_OP_SETFILELOCK) {
 		/*
 		 * increasing i_filelock_ref closes race window between
 		 * handling request reply and adding file_lock struct to
@@ -235,7 +245,13 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 		 * window. Caller function will decrease the counter.
 		 */
 		fl->fl_ops = &ceph_fl_lock_ops;
-		atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+		atomic_inc(&ci->i_filelock_ref);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	if (err < 0) {
+		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
+			posix_lock_file(file, fl, NULL);
+		return err;
 	}
 
 	if (F_RDLCK == fl->fl_type)
@@ -247,10 +263,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 
 	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
 	if (!err) {
-		if (op != CEPH_MDS_OP_GETFILELOCK) {
+		if (op == CEPH_MDS_OP_SETFILELOCK) {
 			dout("mds locked, locking locally");
 			err = posix_lock_file(file, fl, NULL);
-			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+			if (err) {
 				/* undo! This should only happen if
 				 * the kernel detects local
 				 * deadlock. */
@@ -267,9 +283,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
-	int err;
-	u8 lock_cmd;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err = 0;
 	u8 wait = 0;
+	u8 lock_cmd;
 
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
@@ -279,9 +296,20 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 
 	dout("ceph_flock, fl_file: %p", fl->fl_file);
 
-	/* see comment in ceph_lock */
-	fl->fl_ops = &ceph_fl_lock_ops;
-	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
+		err = -EIO;
+	} else {
+		/* see comment in ceph_lock */
+		fl->fl_ops = &ceph_fl_lock_ops;
+		atomic_inc(&ci->i_filelock_ref);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	if (err < 0) {
+		if (F_UNLCK == fl->fl_type)
+			locks_lock_file_wait(file, fl);
+		return err;
+	}
 
 	if (IS_SETLKW(cmd))
 		wait = 1;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 295cf5e42ea9..8c8361262ade 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1215,6 +1215,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		}
 		spin_unlock(&mdsc->cap_dirty_lock);
 
+		if (atomic_read(&ci->i_filelock_ref) > 0) {
+			/* make further file lock syscall return -EIO */
+			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
+					    inode, ceph_ino(inode));
+		}
+
 		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
 			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
 			ci->i_prealloc_cap_flush = NULL;
@@ -2832,7 +2839,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		struct ceph_mds_cap_reconnect v2;
 		struct ceph_mds_cap_reconnect_v1 v1;
 	} rec;
-	struct ceph_inode_info *ci;
+	struct ceph_inode_info *ci = cap->ci;
 	struct ceph_reconnect_state *recon_state = arg;
 	struct ceph_pagelist *pagelist = recon_state->pagelist;
 	char *path;
@@ -2841,8 +2848,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	u64 snap_follows;
 	struct dentry *dentry;
 
-	ci = cap->ci;
-
 	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
 	     inode, ceph_vinop(inode), cap, cap->cap_id,
 	     ceph_cap_string(cap->issued));
@@ -2875,7 +2880,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		rec.v2.issued = cpu_to_le32(cap->issued);
 		rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
 		rec.v2.pathbase = cpu_to_le64(pathbase);
-		rec.v2.flock_len = 0;
+		rec.v2.flock_len =
+			(ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1;
 	} else {
 		rec.v1.cap_id = cpu_to_le64(cap->cap_id);
 		rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
@@ -2904,7 +2910,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		u8 struct_v = 0;
 
 encode_again:
-		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+		if (rec.v2.flock_len) {
+			ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+		} else {
+			num_fcntl_locks = 0;
+			num_flock_locks = 0;
+		}
 		if (num_fcntl_locks + num_flock_locks > 0) {
 			flocks = kmalloc((num_fcntl_locks + num_flock_locks) *
 					 sizeof(struct ceph_filelock), GFP_NOFS);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 0b2c801f4bbb..2beeec07fa76 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -488,6 +488,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_KICK_FLUSH	(1 << 9)  /* kick flushing caps */
 #define CEPH_I_FLUSH_SNAPS	(1 << 10) /* need flush snapss */
 #define CEPH_I_ERROR_WRITE	(1 << 11) /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK	(1 << 12) /* have seen file lock errors */
+
 
 /*
  * We set the ERROR_WRITE bit when we start seeing write errors on an inode
-- 
cgit v1.2.3


From 7271efa79f8bc01694d1a9fce597088a97b3b160 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Sat, 7 Oct 2017 16:02:21 +0200
Subject: ceph: fix bool initialization/comparison

Bool initializations should use true and false. Bool tests don't need
comparisons.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ff5d32cf9578..05ae1e472547 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1712,7 +1712,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 
 	/* if we are unmounting, flush any unused caps immediately. */
 	if (mdsc->stopping)
-		is_delayed = 1;
+		is_delayed = true;
 
 	spin_lock(&ci->i_ceph_lock);
 
@@ -3189,8 +3189,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 	int dirty = le32_to_cpu(m->dirty);
 	int cleaned = 0;
 	bool drop = false;
-	bool wake_ci = 0;
-	bool wake_mdsc = 0;
+	bool wake_ci = false;
+	bool wake_mdsc = false;
 
 	list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
 		if (cf->tid == flush_tid)
-- 
cgit v1.2.3


From 933ad2c9c8bbb1623c2d3c5753ad340152e15d9d Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 10 Oct 2017 17:06:25 +0800
Subject: ceph: disable cached readdir after dropping positive dentry

Ideally CEPH_CAP_FILE_SHARED should have been revoked before
postive dentry get dropped. But if something goes wrong, later
cached readdir may dereference the dropped dentry.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 6301bf299b7c..16d8b9dac649 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1186,6 +1186,7 @@ retry_lookup:
 				    ceph_snap(d_inode(dn)) != tvino.snap)) {
 				dout(" dn %p points to wrong inode %p\n",
 				     dn, d_inode(dn));
+				ceph_dir_clear_ordered(dir);
 				d_delete(dn);
 				dput(dn);
 				goto retry_lookup;
@@ -1323,6 +1324,7 @@ retry_lookup:
 			dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
 			     dn, d_inode(dn), ceph_vinop(d_inode(dn)),
 			     ceph_vinop(in));
+			ceph_dir_clear_ordered(dir);
 			d_invalidate(dn);
 			have_lease = false;
 		}
@@ -1574,6 +1576,7 @@ retry_lookup:
 			    ceph_snap(d_inode(dn)) != tvino.snap)) {
 			dout(" dn %p points to wrong inode %p\n",
 			     dn, d_inode(dn));
+			__ceph_dir_clear_ordered(ci);
 			d_delete(dn);
 			dput(dn);
 			goto retry_lookup;
@@ -1598,7 +1601,9 @@ retry_lookup:
 				 &req->r_caps_reservation);
 		if (ret < 0) {
 			pr_err("fill_inode badness on %p\n", in);
-			if (d_really_is_negative(dn))
+			if (d_really_is_positive(dn))
+				__ceph_dir_clear_ordered(ci);
+			else
 				iput(in);
 			d_drop(dn);
 			err = ret;
-- 
cgit v1.2.3


From 76bd6ec4988065d39983ba8e93bb738313f68050 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 16 Oct 2017 10:32:50 +0200
Subject: ceph: -EINVAL on decoding failure in ceph_mdsc_handle_fsmap()

Don't set ->mdsmap_err to -ENOENT unconditionally, and drop unneeded
return statement while at it.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8c8361262ade..b76506be4228 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3879,14 +3879,14 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 		goto err_out;
 	}
 	return;
+
 bad:
 	pr_err("error decoding fsmap\n");
 err_out:
 	mutex_lock(&mdsc->mutex);
-	mdsc->mdsmap_err = -ENOENT;
+	mdsc->mdsmap_err = err;
 	__wake_requests(mdsc, &mdsc->waiting_for_map);
 	mutex_unlock(&mdsc->mutex);
-	return;
 }
 
 /*
-- 
cgit v1.2.3


From bb0581f01c38ff525295fc6128bc3a49202dabae Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 18 Oct 2017 12:34:25 +0100
Subject: ceph: remove unused and redundant variable dropping

Variable dropping is set but never read and hence is redundant
and can be removed. Cleans up clang warning:

  fs/ceph/caps.c:1170:2: warning: Value stored to 'dropping' is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 05ae1e472547..a14b2c974c9e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1160,7 +1160,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	struct ceph_inode_info *ci = cap->ci;
 	struct inode *inode = &ci->vfs_inode;
 	struct cap_msg_args arg;
-	int held, revoking, dropping;
+	int held, revoking;
 	int wake = 0;
 	int delayed = 0;
 	int ret;
@@ -1168,7 +1168,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	held = cap->issued | cap->implemented;
 	revoking = cap->implemented & ~cap->issued;
 	retain &= ~revoking;
-	dropping = cap->issued & ~retain;
 
 	dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n",
 	     inode, cap, cap->session,
-- 
cgit v1.2.3


From c8a96a31cb04c7664626ab6ada7f66c98c09efbd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 19 Oct 2017 08:53:58 -0400
Subject: ceph: clean up spinlocking and list handling around
 cleanup_cap_releases()

Functions that release a lock taken in a parent frame are notoriously
hard to follow. Split cleanup_cap_releases into two functions, one to
detach the cap releases from the session (which should be called with
the spinlock held), and another to dispose of those caps.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b76506be4228..53cde84e698a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1039,22 +1039,23 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
  * session caps
  */
 
-/* caller holds s_cap_lock, we drop it */
-static void cleanup_cap_releases(struct ceph_mds_client *mdsc,
-				 struct ceph_mds_session *session)
-	__releases(session->s_cap_lock)
+static void detach_cap_releases(struct ceph_mds_session *session,
+				struct list_head *target)
 {
-	LIST_HEAD(tmp_list);
-	list_splice_init(&session->s_cap_releases, &tmp_list);
+	lockdep_assert_held(&session->s_cap_lock);
+
+	list_splice_init(&session->s_cap_releases, target);
 	session->s_num_cap_releases = 0;
-	spin_unlock(&session->s_cap_lock);
+	dout("dispose_cap_releases mds%d\n", session->s_mds);
+}
 
-	dout("cleanup_cap_releases mds%d\n", session->s_mds);
-	while (!list_empty(&tmp_list)) {
+static void dispose_cap_releases(struct ceph_mds_client *mdsc,
+				 struct list_head *dispose)
+{
+	while (!list_empty(dispose)) {
 		struct ceph_cap *cap;
 		/* zero out the in-progress message */
-		cap = list_first_entry(&tmp_list,
-					struct ceph_cap, session_caps);
+		cap = list_first_entry(dispose, struct ceph_cap, session_caps);
 		list_del(&cap->session_caps);
 		ceph_put_cap(mdsc, cap);
 	}
@@ -1251,6 +1252,8 @@ static void remove_session_caps(struct ceph_mds_session *session)
 {
 	struct ceph_fs_client *fsc = session->s_mdsc->fsc;
 	struct super_block *sb = fsc->sb;
+	LIST_HEAD(dispose);
+
 	dout("remove_session_caps on %p\n", session);
 	iterate_session_caps(session, remove_session_caps_cb, fsc);
 
@@ -1285,10 +1288,12 @@ static void remove_session_caps(struct ceph_mds_session *session)
 	}
 
 	// drop cap expires and unlock s_cap_lock
-	cleanup_cap_releases(session->s_mdsc, session);
+	detach_cap_releases(session, &dispose);
 
 	BUG_ON(session->s_nr_caps > 0);
 	BUG_ON(!list_empty(&session->s_cap_flushing));
+	spin_unlock(&session->s_cap_lock);
+	dispose_cap_releases(session->s_mdsc, &dispose);
 }
 
 /*
@@ -3015,6 +3020,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	int s_nr_caps;
 	struct ceph_pagelist *pagelist;
 	struct ceph_reconnect_state recon_state;
+	LIST_HEAD(dispose);
 
 	pr_info("mds%d reconnect start\n", mds);
 
@@ -3048,7 +3054,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	 */
 	session->s_cap_reconnect = 1;
 	/* drop old cap expires; we're about to reestablish that state */
-	cleanup_cap_releases(mdsc, session);
+	detach_cap_releases(session, &dispose);
+	spin_unlock(&session->s_cap_lock);
+	dispose_cap_releases(mdsc, &dispose);
 
 	/* trim unused caps to reduce MDS's cache rejoin time */
 	if (mdsc->fsc->sb->s_root)
-- 
cgit v1.2.3


From 080a330e1d9142b9d958a40dcef3ae5ae5d8820a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 23 Oct 2017 10:58:40 -0400
Subject: ceph: present consistent fsid, regardless of arch endianness

Since its inception, ceph has presented the fsid as an opaque value
without any sort of endianness conversion. This means that the value
presented is different on architectures of different endianness.

While the value that should be stuffed into f_fsid is poorly-defined,
I think it would be best to strive for consistency here between
architectures, and clients (we need to present this properly to the
userland client as well).

Change ceph_statfs to convert the opaque words to host-endian before
doing the xor. On an upgrade, a big-endian box may see a different fsid
than it did before, but little-endian arches should see no change with
this patch.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/super.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e4082afedcb1..fe9fbb3f13f7 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -84,8 +84,9 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_ffree = -1;
 	buf->f_namelen = NAME_MAX;
 
-	/* leave fsid little-endian, regardless of host endianness */
-	fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
+	/* Must convert the fsid, for consistent values across arches */
+	fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^
+	       le64_to_cpu(*((__le64 *)&monmap->fsid + 1));
 	buf->f_fsid.val[0] = fsid & 0xffffffff;
 	buf->f_fsid.val[1] = fsid >> 32;
 
-- 
cgit v1.2.3


From 81302565178cfa2d419f5e9743add02997c6b2dc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 30 Oct 2017 10:58:30 -0400
Subject: ceph: remove the bump of i_version

Eventually, we'll want to wire cephfs up to use the change attribute
that the cluster tracks instead, but for now this is unneeded.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 16d8b9dac649..ab81652198c4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -787,7 +787,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 
 	/* update inode */
 	ci->i_version = le64_to_cpu(info->version);
-	inode->i_version++;
 	inode->i_rdev = le32_to_cpu(info->rdev);
 	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
-- 
cgit v1.2.3


From ec1dff25b0a012711e10290039fbc8e1c5dd69fb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 31 Oct 2017 15:51:14 -0400
Subject: ceph: silence sparse endianness warning in encode_caps_cb

sparse warns:

  fs/ceph/mds_client.c:2887:34: warning: incorrect type in assignment (different base types)
  fs/ceph/mds_client.c:2887:34:    expected restricted __le32 [assigned] [usertype] flock_len
  fs/ceph/mds_client.c:2887:34:    got int

At this point, it's just being used as a flag. It gets
overwritten later if the rest of the encoding succeeds.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 53cde84e698a..ab69dcb70e8a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2885,8 +2885,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		rec.v2.issued = cpu_to_le32(cap->issued);
 		rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
 		rec.v2.pathbase = cpu_to_le64(pathbase);
-		rec.v2.flock_len =
-			(ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1;
+		rec.v2.flock_len = (__force __le32)
+			((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
 	} else {
 		rec.v1.cap_id = cpu_to_le64(cap->cap_id);
 		rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
-- 
cgit v1.2.3