From cf07d2ea43e5c22149435ee9002cb737eac20eca Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 28 Feb 2010 23:20:19 -0500
Subject: nfsd4: simplify references to nfsd4 lease time

Instead of accessing the lease time directly, some users call
nfs4_lease_time(), and some a macro, NFSD_LEASE_TIME, defined as
nfs4_lease_time().  Neither layer of indirection serves any purpose.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c |  2 +-
 fs/nfsd/nfs4state.c    | 22 ++++++++--------------
 fs/nfsd/nfs4xdr.c      |  2 +-
 fs/nfsd/nfsctl.c       |  5 +----
 fs/nfsd/nfsd.h         |  5 ++---
 5 files changed, 13 insertions(+), 23 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4bc22c763de7..ed12ad40828b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -455,7 +455,7 @@ static struct rpc_program cb_program = {
 
 static int max_cb_time(void)
 {
-	return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
+	return max(nfsd4_lease/10, (time_t)1) * HZ;
 }
 
 /* Reference counting, callback cleanup, etc., all look racy as heck.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3a20c09353ed..cc9164a34bec 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -44,7 +44,7 @@
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
 /* Globals */
-static time_t lease_time = 90;     /* default lease time */
+time_t nfsd4_lease = 90;     /* default lease time */
 static time_t user_lease_time = 90;
 static time_t boot_time;
 static u32 current_ownerid = 1;
@@ -2560,9 +2560,9 @@ nfs4_laundromat(void)
 	struct nfs4_stateowner *sop;
 	struct nfs4_delegation *dp;
 	struct list_head *pos, *next, reaplist;
-	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
-	time_t t, clientid_val = NFSD_LEASE_TIME;
-	time_t u, test_val = NFSD_LEASE_TIME;
+	time_t cutoff = get_seconds() - nfsd4_lease;
+	time_t t, clientid_val = nfsd4_lease;
+	time_t u, test_val = nfsd4_lease;
 
 	nfs4_lock_state();
 
@@ -2602,7 +2602,7 @@ nfs4_laundromat(void)
 		list_del_init(&dp->dl_recall_lru);
 		unhash_delegation(dp);
 	}
-	test_val = NFSD_LEASE_TIME;
+	test_val = nfsd4_lease;
 	list_for_each_safe(pos, next, &close_lru) {
 		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
 		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
@@ -2672,7 +2672,7 @@ EXPIRED_STATEID(stateid_t *stateid)
 {
 	if (time_before((unsigned long)boot_time,
 			((unsigned long)stateid->si_boot)) &&
-	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
+	    time_before((unsigned long)(stateid->si_boot + nfsd4_lease), get_seconds())) {
 		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
 			STATEID_VAL(stateid));
 		return 1;
@@ -3976,7 +3976,7 @@ nfsd4_load_reboot_recovery_data(void)
 unsigned long
 get_nfs4_grace_period(void)
 {
-	return max(user_lease_time, lease_time) * HZ;
+	return max(user_lease_time, nfsd4_lease) * HZ;
 }
 
 /*
@@ -4009,7 +4009,7 @@ __nfs4_state_start(void)
 
 	boot_time = get_seconds();
 	grace_time = get_nfs4_grace_period();
-	lease_time = user_lease_time;
+	nfsd4_lease = user_lease_time;
 	locks_start_grace(&nfsd4_manager);
 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
 	       grace_time/HZ);
@@ -4036,12 +4036,6 @@ nfs4_state_start(void)
 	return 0;
 }
 
-time_t
-nfs4_lease_time(void)
-{
-	return lease_time;
-}
-
 static void
 __nfs4_state_shutdown(void)
 {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c458fb11c957..f61bd736152b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1899,7 +1899,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
 		if ((buflen -= 4) < 0)
 			goto out_resource;
-		WRITE32(NFSD_LEASE_TIME);
+		WRITE32(nfsd4_lease);
 	}
 	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
 		if ((buflen -= 4) < 0)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0f0e77f2012f..8bff6741b1e6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1203,8 +1203,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 }
 
 #ifdef CONFIG_NFSD_V4
-extern time_t nfs4_leasetime(void);
-
 static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
 {
 	/* if size > 10 seconds, call
@@ -1224,8 +1222,7 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
 		nfs4_reset_lease(lease);
 	}
 
-	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
-							nfs4_lease_time());
+	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", nfsd4_lease);
 }
 
 /**
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index e942a1aaac92..b463093af255 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -82,7 +82,6 @@ int nfs4_state_init(void);
 void nfsd4_free_slabs(void);
 int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
-time_t nfs4_lease_time(void);
 void nfs4_reset_lease(time_t leasetime);
 int nfs4_reset_recoverydir(char *recdir);
 #else
@@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
 static inline int nfs4_state_start(void) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
-static inline time_t nfs4_lease_time(void) { return 0; }
 static inline void nfs4_reset_lease(time_t leasetime) { }
 static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
 #endif
@@ -229,6 +227,8 @@ extern struct timeval	nfssvc_boot;
 
 #ifdef CONFIG_NFSD_V4
 
+extern time_t nfsd4_lease;
+
 /* before processing a COMPOUND operation, we have to check that there
  * is enough space in the buffer for XDR encode to succeed.  otherwise,
  * we might process an operation with side effects, and be unable to
@@ -247,7 +247,6 @@ extern struct timeval	nfssvc_boot;
 #define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
 #define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
 
-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
 #define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
 
 /*
-- 
cgit v1.2.3


From e46b498c84163e86e2627c30bca298c968664f65 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 1 Mar 2010 19:21:21 -0500
Subject: nfsd4: simplify lease/grace interaction

The original code here assumed we'd allow the user to change the lease
any time, but only allow the change to take effect on restart.  Since
then we modified the code to allow setting the lease on when the server
is down.  Update the rest of the code to reflect that fact, clarify
variable names, and add document.

Also, the code insisted that the grace period always be the longer of
the old and new lease periods, but that's overly conservative--as long
as it lasts at least the old lease period, old clients should still know
to recover in time.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cc9164a34bec..eb8d124ec14d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -45,7 +45,7 @@
 
 /* Globals */
 time_t nfsd4_lease = 90;     /* default lease time */
-static time_t user_lease_time = 90;
+static time_t nfsd4_grace = 90;
 static time_t boot_time;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
@@ -2551,6 +2551,12 @@ nfsd4_end_grace(void)
 	dprintk("NFSD: end of grace period\n");
 	nfsd4_recdir_purge_old();
 	locks_end_grace(&nfsd4_manager);
+	/*
+	 * Now that every NFSv4 client has had the chance to recover and
+	 * to see the (possibly new, possibly shorter) lease time, we
+	 * can safely set the next grace time to the current lease time:
+	 */
+	nfsd4_grace = nfsd4_lease;
 }
 
 static time_t
@@ -3973,12 +3979,6 @@ nfsd4_load_reboot_recovery_data(void)
 		printk("NFSD: Failure reading reboot recovery data\n");
 }
 
-unsigned long
-get_nfs4_grace_period(void)
-{
-	return max(user_lease_time, nfsd4_lease) * HZ;
-}
-
 /*
  * Since the lifetime of a delegation isn't limited to that of an open, a
  * client may quite reasonably hang on to a delegation as long as it has
@@ -4005,18 +4005,14 @@ set_max_delegations(void)
 static int
 __nfs4_state_start(void)
 {
-	unsigned long grace_time;
-
 	boot_time = get_seconds();
-	grace_time = get_nfs4_grace_period();
-	nfsd4_lease = user_lease_time;
 	locks_start_grace(&nfsd4_manager);
 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
-	       grace_time/HZ);
+	       nfsd4_grace);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
 	if (laundry_wq == NULL)
 		return -ENOMEM;
-	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
+	queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
 	set_max_delegations();
 	return set_callback_cred();
 }
@@ -4123,17 +4119,11 @@ nfs4_recoverydir(void)
 /*
  * Called when leasetime is changed.
  *
- * The only way the protocol gives us to handle on-the-fly lease changes is to
- * simulate a reboot.  Instead of doing that, we just wait till the next time
- * we start to register any changes in lease time.  If the administrator
- * really wants to change the lease time *now*, they can go ahead and bring
- * nfsd down and then back up again after changing the lease time.
- *
- * user_lease_time is protected by nfsd_mutex since it's only really accessed
+ * nfsd4_lease is protected by nfsd_mutex since it's only really accessed
  * when nfsd is starting
  */
 void
 nfs4_reset_lease(time_t leasetime)
 {
-	user_lease_time = leasetime;
+	nfsd4_lease = leasetime;
 }
-- 
cgit v1.2.3


From f958a1320ff7a1e0e861d3c90de6da12a88839dc Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 1 Mar 2010 19:43:02 -0500
Subject: nfsd4: remove unnecessary lease-setting function

This is another layer of indirection that doesn't really buy us
anything.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 12 ------------
 fs/nfsd/nfsctl.c    |  2 +-
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index eb8d124ec14d..4471046e19e0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4115,15 +4115,3 @@ nfs4_recoverydir(void)
 {
 	return user_recovery_dirname;
 }
-
-/*
- * Called when leasetime is changed.
- *
- * nfsd4_lease is protected by nfsd_mutex since it's only really accessed
- * when nfsd is starting
- */
-void
-nfs4_reset_lease(time_t leasetime)
-{
-	nfsd4_lease = leasetime;
-}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 8bff6741b1e6..6738e9d8a83d 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1219,7 +1219,7 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
 			return rv;
 		if (lease < 10 || lease > 3600)
 			return -EINVAL;
-		nfs4_reset_lease(lease);
+		nfsd4_lease = lease;
 	}
 
 	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", nfsd4_lease);
-- 
cgit v1.2.3


From f013574014816c7a557b3c52233f3620463f0b9b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 1 Mar 2010 19:32:36 -0500
Subject: nfsd4: reshuffle lease-setting code to allow reuse

We'll soon allow setting the grace period, so we'll want to share this
code.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6738e9d8a83d..9c73caccffff 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1203,26 +1203,36 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 }
 
 #ifdef CONFIG_NFSD_V4
-static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
+static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
 {
 	/* if size > 10 seconds, call
 	 * nfs4_reset_lease() then write out the new lease (seconds) as reply
 	 */
 	char *mesg = buf;
-	int rv, lease;
+	int rv, i;
 
 	if (size > 0) {
 		if (nfsd_serv)
 			return -EBUSY;
-		rv = get_int(&mesg, &lease);
+		rv = get_int(&mesg, &i);
 		if (rv)
 			return rv;
-		if (lease < 10 || lease > 3600)
+		if (i < 10 || i > 3600)
 			return -EINVAL;
-		nfsd4_lease = lease;
+		*time = i;
 	}
 
-	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", nfsd4_lease);
+	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
+}
+
+static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+{
+	ssize_t rv;
+
+	mutex_lock(&nfsd_mutex);
+	rv = __nfsd4_write_time(file, buf, size, time);
+	mutex_unlock(&nfsd_mutex);
+	return rv;
 }
 
 /**
@@ -1248,12 +1258,7 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
  */
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 {
-	ssize_t rv;
-
-	mutex_lock(&nfsd_mutex);
-	rv = __write_leasetime(file, buf, size);
-	mutex_unlock(&nfsd_mutex);
-	return rv;
+	return nfsd4_write_time(file, buf, size, &nfsd4_lease);
 }
 
 extern char *nfs4_recoverydir(void);
-- 
cgit v1.2.3


From efc4bb4fdd09c11f5558446e584a494c6feb43c7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 2 Mar 2010 11:04:06 -0500
Subject: nfsd4: allow setting grace period time

Allow explicit configuration of the grace period time as well as the
lease period time.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c |  2 +-
 fs/nfsd/nfsctl.c    | 19 +++++++++++++++++++
 fs/nfsd/nfsd.h      |  1 +
 3 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4471046e19e0..6edfe23694e6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -45,7 +45,7 @@
 
 /* Globals */
 time_t nfsd4_lease = 90;     /* default lease time */
-static time_t nfsd4_grace = 90;
+time_t nfsd4_grace = 90;
 static time_t boot_time;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 9c73caccffff..7ab70ff212d8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -45,6 +45,7 @@ enum {
 	 */
 #ifdef CONFIG_NFSD_V4
 	NFSD_Leasetime,
+	NFSD_Gracetime,
 	NFSD_RecoveryDir,
 #endif
 };
@@ -69,6 +70,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size);
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
 #ifdef CONFIG_NFSD_V4
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
 #endif
 
@@ -90,6 +92,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_MaxBlkSize] = write_maxblksize,
 #ifdef CONFIG_NFSD_V4
 	[NFSD_Leasetime] = write_leasetime,
+	[NFSD_Gracetime] = write_gracetime,
 	[NFSD_RecoveryDir] = write_recoverydir,
 #endif
 };
@@ -1261,6 +1264,21 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 	return nfsd4_write_time(file, buf, size, &nfsd4_lease);
 }
 
+/**
+ * write_gracetime - Set or report current NFSv4 grace period time
+ *
+ * As above, but sets the time of the NFSv4 grace period.
+ *
+ * Note this should never be set to less than the *previous*
+ * lease-period time, but we don't try to enforce this.  (In the common
+ * case (a new boot), we don't know what the previous lease time was
+ * anyway.)
+ */
+static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
+{
+	return nfsd4_write_time(file, buf, size, &nfsd4_grace);
+}
+
 extern char *nfs4_recoverydir(void);
 
 static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
@@ -1352,6 +1370,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
 #ifdef CONFIG_NFSD_V4
 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
 #endif
 		/* last one */ {""}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index b463093af255..72377761270e 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -228,6 +228,7 @@ extern struct timeval	nfssvc_boot;
 #ifdef CONFIG_NFSD_V4
 
 extern time_t nfsd4_lease;
+extern time_t nfsd4_grace;
 
 /* before processing a COMPOUND operation, we have to check that there
  * is enough space in the buffer for XDR encode to succeed.  otherwise,
-- 
cgit v1.2.3


From e7b184f199fd3c80b618ec8244cbda70857d2779 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 2 Mar 2010 11:18:40 -0500
Subject: nfsd4: document lease/grace-period limits

The current documentation here is out of date, and not quite right.

(Future work: some user documentation would be useful.)

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7ab70ff212d8..413cb8ef951b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1208,9 +1208,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 #ifdef CONFIG_NFSD_V4
 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
 {
-	/* if size > 10 seconds, call
-	 * nfs4_reset_lease() then write out the new lease (seconds) as reply
-	 */
 	char *mesg = buf;
 	int rv, i;
 
@@ -1220,6 +1217,18 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, tim
 		rv = get_int(&mesg, &i);
 		if (rv)
 			return rv;
+		/*
+		 * Some sanity checking.  We don't have a reason for
+		 * these particular numbers, but problems with the
+		 * extremes are:
+		 *	- Too short: the briefest network outage may
+		 *	  cause clients to lose all their locks.  Also,
+		 *	  the frequent polling may be wasteful.
+		 *	- Too long: do you really want reboot recovery
+		 *	  to take more than an hour?  Or to make other
+		 *	  clients wait an hour before being able to
+		 *	  revoke a dead client's locks?
+		 */
 		if (i < 10 || i > 3600)
 			return -EINVAL;
 		*time = i;
-- 
cgit v1.2.3


From 61f8603d93fa0b0e2f73be7a4f048696417a24a3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 3 Feb 2010 17:31:31 +1100
Subject: nfsd: factor out hash functions for export caches.

Both the _lookup and the _update functions for these two caches
independently calculate the hash of the key.
So factor out that code for improved reuse.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index a0c4016413f1..65ddc5b8eb33 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -258,10 +258,9 @@ static struct cache_detail svc_expkey_cache = {
 	.alloc		= expkey_alloc,
 };
 
-static struct svc_expkey *
-svc_expkey_lookup(struct svc_expkey *item)
+static int
+svc_expkey_hash(struct svc_expkey *item)
 {
-	struct cache_head *ch;
 	int hash = item->ek_fsidtype;
 	char * cp = (char*)item->ek_fsid;
 	int len = key_len(item->ek_fsidtype);
@@ -269,6 +268,14 @@ svc_expkey_lookup(struct svc_expkey *item)
 	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
 	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
 	hash &= EXPKEY_HASHMASK;
+	return hash;
+}
+
+static struct svc_expkey *
+svc_expkey_lookup(struct svc_expkey *item)
+{
+	struct cache_head *ch;
+	int hash = svc_expkey_hash(item);
 
 	ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
 				 hash);
@@ -282,13 +289,7 @@ static struct svc_expkey *
 svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
 {
 	struct cache_head *ch;
-	int hash = new->ek_fsidtype;
-	char * cp = (char*)new->ek_fsid;
-	int len = key_len(new->ek_fsidtype);
-
-	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
-	hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
-	hash &= EXPKEY_HASHMASK;
+	int hash = svc_expkey_hash(new);
 
 	ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
 				 &old->h, hash);
@@ -737,14 +738,22 @@ struct cache_detail svc_export_cache = {
 	.alloc		= svc_export_alloc,
 };
 
-static struct svc_export *
-svc_export_lookup(struct svc_export *exp)
+static int
+svc_export_hash(struct svc_export *exp)
 {
-	struct cache_head *ch;
 	int hash;
+
 	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
 	hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
 	hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
+	return hash;
+}
+
+static struct svc_export *
+svc_export_lookup(struct svc_export *exp)
+{
+	struct cache_head *ch;
+	int hash = svc_export_hash(exp);
 
 	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
 				 hash);
@@ -758,10 +767,7 @@ static struct svc_export *
 svc_export_update(struct svc_export *new, struct svc_export *old)
 {
 	struct cache_head *ch;
-	int hash;
-	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
-	hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
-	hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
+	int hash = svc_export_hash(old);
 
 	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
 				 &old->h,
-- 
cgit v1.2.3


From 91885258e8343bb65c08f668d7e6c16563eb4284 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 19 Mar 2010 08:06:28 -0400
Subject: nfsd: don't break lease while servicing a COMMIT

This is the second attempt to fix the problem whereby a COMMIT call
causes a lease break and triggers a possible deadlock.

The problem is that nfsd attempts to break a lease on a COMMIT call.
This triggers a delegation recall if the lease is held for a delegation.
If the client is the one holding the delegation and it's the same one on
which it's issuing the COMMIT, then it can't return that delegation
until the COMMIT is complete. But, nfsd won't complete the COMMIT until
the delegation is returned. The client and server are essentially
deadlocked until the state is marked bad (due to the client not
responding on the callback channel).

The first patch attempted to deal with this by eliminating the open of
the file altogether and simply had nfsd_commit pass a NULL file pointer
to the vfs_fsync_range. That would conflict with some work in progress
by Christoph Hellwig to clean up the fsync interface, so this patch
takes a different approach.

This declares a new NFSD_MAY_NOT_BREAK_LEASE access flag that indicates
to nfsd_open that it should not break any leases when opening the file,
and has nfsd_commit set that flag on the nfsd_open call.

For now, this patch leaves nfsd_commit opening the file with write
access since I'm not clear on what sort of access would be more
appropriate.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/vfs.c | 8 +++++---
 fs/nfsd/vfs.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a11b0e8678ee..c2dcb4c2b1fc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -723,7 +723,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	struct inode	*inode;
 	int		flags = O_RDONLY|O_LARGEFILE;
 	__be32		err;
-	int		host_err;
+	int		host_err = 0;
 
 	validate_process_creds();
 
@@ -760,7 +760,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	 * Check to see if there are any leases on this file.
 	 * This may block while leases are broken.
 	 */
-	host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
+	if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
+		host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
 	if (host_err == -EWOULDBLOCK)
 		host_err = -ETIMEDOUT;
 	if (host_err) /* NOMEM or WOULDBLOCK */
@@ -1168,7 +1169,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			goto out;
 	}
 
-	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+	err = nfsd_open(rqstp, fhp, S_IFREG,
+			NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
 	if (err)
 		goto out;
 	if (EX_ISSYNC(fhp->fh_export)) {
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 4b1de0a9ea75..217a62c2a357 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -20,6 +20,7 @@
 #define NFSD_MAY_OWNER_OVERRIDE	64
 #define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
 #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
+#define NFSD_MAY_NOT_BREAK_LEASE 512
 
 #define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
 #define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
-- 
cgit v1.2.3


From 227f98d98d2ed7929f41426adc21f57b927354a6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 18 Feb 2010 08:27:24 -0800
Subject: nfsd4: preallocate nfs4_rpc_args

Instead of allocating this small structure, just include it in the
delegation.

The nfsd4_callback structure isn't really necessary yet, but we plan to
add to it all the information necessary to perform a callback.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c | 18 +++---------------
 fs/nfsd/state.h        | 10 ++++++++++
 2 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ed12ad40828b..b99c3f0f1d35 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -78,11 +78,6 @@ enum nfs_cb_opnum4 {
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
 
-struct nfs4_rpc_args {
-	void				*args_op;
-	struct nfsd4_cb_sequence	args_seq;
-};
-
 /*
 * Generic encode routines from fs/nfs/nfs4xdr.c
 */
@@ -676,7 +671,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 		break;
 	default:
 		/* success, or error we can't handle */
-		goto done;
+		return;
 	}
 	if (dp->dl_retries--) {
 		rpc_delay(task, 2*HZ);
@@ -687,8 +682,6 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 		atomic_set(&clp->cl_cb_conn.cb_set, 0);
 		warn_no_callback_path(clp, task->tk_status);
 	}
-done:
-	kfree(task->tk_msg.rpc_argp);
 }
 
 static void nfsd4_cb_recall_release(void *calldata)
@@ -714,24 +707,19 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfs4_client *clp = dp->dl_client;
 	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
-	struct nfs4_rpc_args *args;
+	struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
 		.rpc_cred = callback_cred
 	};
-	int status = -ENOMEM;
+	int status;
 
-	args = kzalloc(sizeof(*args), GFP_KERNEL);
-	if (!args)
-		goto out;
 	args->args_op = dp;
 	msg.rpc_argp = args;
 	dp->dl_retries = 1;
 	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
 				&nfsd4_cb_recall_ops, dp);
-out:
 	if (status) {
-		kfree(args);
 		put_nfs4_client(clp);
 		nfs4_put_delegation(dp);
 	}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index fefeae27f25e..b85437982a8d 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,15 @@ struct nfsd4_cb_sequence {
 	struct nfs4_client	*cbs_clp;
 };
 
+struct nfs4_rpc_args {
+	void				*args_op;
+	struct nfsd4_cb_sequence	args_seq;
+};
+
+struct nfsd4_callback {
+	struct nfs4_rpc_args cb_args;
+};
+
 struct nfs4_delegation {
 	struct list_head	dl_perfile;
 	struct list_head	dl_perclnt;
@@ -86,6 +95,7 @@ struct nfs4_delegation {
 	stateid_t		dl_stateid;
 	struct knfsd_fh		dl_fh;
 	int			dl_retries;
+	struct nfsd4_callback	dl_recall;
 };
 
 /* client delegation callback info */
-- 
cgit v1.2.3


From 147efd0dd702ce2f1ab44449bd70369405ef68fd Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 21 Feb 2010 17:41:19 -0800
Subject: nfsd4: shutdown callbacks on expiry

Once we've expired the client, there's no further purpose to the
callbacks; go ahead and shut down the callback client rather than
waiting for the last reference to go.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index efef7f2442d5..9ce58318ca8c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -697,9 +697,6 @@ shutdown_callback_client(struct nfs4_client *clp)
 static inline void
 free_client(struct nfs4_client *clp)
 {
-	shutdown_callback_client(clp);
-	if (clp->cl_cb_xprt)
-		svc_xprt_put(clp->cl_cb_xprt);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
 	kfree(clp->cl_principal);
@@ -752,6 +749,9 @@ expire_client(struct nfs4_client *clp)
 				 se_perclnt);
 		release_session(ses);
 	}
+	shutdown_callback_client(clp);
+	if (clp->cl_cb_xprt)
+		svc_xprt_put(clp->cl_cb_xprt);
 	put_nfs4_client(clp);
 }
 
-- 
cgit v1.2.3


From 3df796dbe97a98a6a25e6b7b88e9d326e261f371 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 21 Feb 2010 17:51:53 -0800
Subject: nfsd4: remove dprintk

I haven't found this useful.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9ce58318ca8c..5d86df1d1881 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -718,9 +718,6 @@ expire_client(struct nfs4_client *clp)
 	struct nfs4_delegation *dp;
 	struct list_head reaplist;
 
-	dprintk("NFSD: expire_client cl_count %d\n",
-	                    atomic_read(&clp->cl_count));
-
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
 	while (!list_empty(&clp->cl_delegations)) {
-- 
cgit v1.2.3


From 9045b4b9f7f340f43de0cf687b5b52f6feaaa984 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 21 Feb 2010 17:53:04 -0800
Subject: nfsd4: remove probe task's reference on client

Any null probe rpc will be synchronously destroyed by the
rpc_shutdown_client() in expire_client(), so the rpc task cannot outlast
the nfs4 client.  Therefore there's no need for that task to hold a
reference on the client.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index b99c3f0f1d35..91eb2ea9ef0a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -509,7 +509,6 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
 		warn_no_callback_path(clp, task->tk_status);
 	else
 		atomic_set(&clp->cl_cb_conn.cb_set, 1);
-	put_nfs4_client(clp);
 }
 
 static const struct rpc_call_ops nfsd4_cb_probe_ops = {
@@ -542,10 +541,8 @@ void do_probe_callback(struct nfs4_client *clp)
 	status = rpc_call_async(cb->cb_client, &msg,
 				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
 				&nfsd4_cb_probe_ops, (void *)clp);
-	if (status) {
+	if (status)
 		warn_no_callback_path(clp, status);
-		put_nfs4_client(clp);
-	}
 }
 
 /*
@@ -563,10 +560,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 		warn_no_callback_path(clp, status);
 		return;
 	}
-
-	/* the task holds a reference to the nfs4_client struct */
-	atomic_inc(&clp->cl_count);
-
 	do_probe_callback(clp);
 }
 
-- 
cgit v1.2.3


From 408b79bcc32d7221a4975771ab6bff3d3173d530 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 15 Apr 2010 15:11:09 -0400
Subject: nfsd4: consistent session flag setting

We should clear these flags on any new create_session, not just on the
first one.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5d86df1d1881..5051ade30dfb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1323,12 +1323,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		cs_slot->sl_seqid++; /* from 0 to 1 */
 		move_to_confirmed(unconf);
 
-		/*
-		 * We do not support RDMA or persistent sessions
-		 */
-		cr_ses->flags &= ~SESSION4_PERSIST;
-		cr_ses->flags &= ~SESSION4_RDMA;
-
 		if (cr_ses->flags & SESSION4_BACK_CHAN) {
 			unconf->cl_cb_xprt = rqstp->rq_xprt;
 			svc_xprt_get(unconf->cl_cb_xprt);
@@ -1348,6 +1342,12 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		goto out;
 	}
 
+	/*
+	 * We do not support RDMA or persistent sessions
+	 */
+	cr_ses->flags &= ~SESSION4_PERSIST;
+	cr_ses->flags &= ~SESSION4_RDMA;
+
 	status = alloc_init_session(rqstp, conf, cr_ses);
 	if (status)
 		goto out;
-- 
cgit v1.2.3


From 3c4ab2aaa90826060b1e8d4036f9bb8325f8759e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 19 Apr 2010 15:12:51 -0400
Subject: nfsd4: indentation cleanup

Looks like a put-and-paste mistake.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/xdr4.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index efa337739534..c28958ec216c 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -513,9 +513,8 @@ extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
 extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 		struct nfsd4_sequence *seq);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-struct nfsd4_exchange_id *);
-		extern __be32 nfsd4_create_session(struct svc_rqst *,
+		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
+extern __be32 nfsd4_create_session(struct svc_rqst *,
 		struct nfsd4_compound_state *,
 		struct nfsd4_create_session *);
 extern __be32 nfsd4_sequence(struct svc_rqst *,
-- 
cgit v1.2.3


From b5a1a81e5c25fb6bb3fdc1812ba69ff6ab638fcf Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 3 Mar 2010 14:52:55 -0500
Subject: nfsd4: don't sleep in lease-break callback

The NFSv4 server's fl_break callback can sleep (dropping the BKL), in
order to allocate a new rpc task to send a recall to the client.

As far as I can tell this doesn't cause any races in the current code,
but the analysis is difficult.  Also, the sleep here may complicate the
move away from the BKL.

So, just schedule some work to do the job for us instead.  The work will
later also prove useful for restarting a call after the callback
information is changed.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/nfsd/nfs4state.c    | 34 +++++++++++++++----------------
 fs/nfsd/state.h        |  5 +++++
 3 files changed, 73 insertions(+), 20 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 91eb2ea9ef0a..e078c747f49d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc_xprt.h>
 #include "nfsd.h"
 #include "state.h"
 
@@ -692,11 +693,41 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
 	.rpc_release = nfsd4_cb_recall_release,
 };
 
+static struct workqueue_struct *callback_wq;
+
+int nfsd4_create_callback_queue(void)
+{
+	callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
+	if (!callback_wq)
+		return -ENOMEM;
+	return 0;
+}
+
+void nfsd4_destroy_callback_queue(void)
+{
+	destroy_workqueue(callback_wq);
+}
+
+void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt
+*new)
+{
+	struct rpc_clnt *old = clp->cl_cb_conn.cb_client;
+
+	clp->cl_cb_conn.cb_client = new;
+	/*
+	 * After this, any work that saw the old value of cb_client will
+	 * be gone:
+	 */
+	flush_workqueue(callback_wq);
+	/* So we can safely shut it down: */
+	if (old)
+		rpc_shutdown_client(old);
+}
+
 /*
  * called with dp->dl_count inc'ed.
  */
-void
-nfsd4_cb_recall(struct nfs4_delegation *dp)
+static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfs4_client *clp = dp->dl_client;
 	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
@@ -707,6 +738,9 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 	};
 	int status;
 
+	if (clnt == NULL)
+		return; /* Client is shutting down; give up. */
+
 	args->args_op = dp;
 	msg.rpc_argp = args;
 	dp->dl_retries = 1;
@@ -717,3 +751,19 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 		nfs4_put_delegation(dp);
 	}
 }
+
+void nfsd4_do_callback_rpc(struct work_struct *w)
+{
+	/* XXX: for now, just send off delegation recall. */
+	/* In future, generalize to handle any sort of callback. */
+	struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
+	struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
+
+	_nfsd4_cb_recall(dp);
+}
+
+
+void nfsd4_cb_recall(struct nfs4_delegation *dp)
+{
+	queue_work(callback_wq, &dp->dl_recall.cb_work);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5051ade30dfb..adc51d10d435 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -198,6 +198,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	atomic_set(&dp->dl_count, 1);
 	list_add(&dp->dl_perfile, &fp->fi_delegations);
 	list_add(&dp->dl_perclnt, &clp->cl_delegations);
+	INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
 	return dp;
 }
 
@@ -679,21 +680,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 	return clp;
 }
 
-static void
-shutdown_callback_client(struct nfs4_client *clp)
-{
-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
-
-	if (clnt) {
-		/*
-		 * Callback threads take a reference on the client, so there
-		 * should be no outstanding callbacks at this point.
-		 */
-		clp->cl_cb_conn.cb_client = NULL;
-		rpc_shutdown_client(clnt);
-	}
-}
-
 static inline void
 free_client(struct nfs4_client *clp)
 {
@@ -746,7 +732,7 @@ expire_client(struct nfs4_client *clp)
 				 se_perclnt);
 		release_session(ses);
 	}
-	shutdown_callback_client(clp);
+	nfsd4_set_callback_client(clp, NULL);
 	if (clp->cl_cb_xprt)
 		svc_xprt_put(clp->cl_cb_xprt);
 	put_nfs4_client(clp);
@@ -1392,7 +1378,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	spin_unlock(&sessionid_lock);
 
 	/* wait for callbacks */
-	shutdown_callback_client(ses->se_client);
+	nfsd4_set_callback_client(ses->se_client, NULL);
 	nfsd4_put_session(ses);
 	status = nfs_ok;
 out:
@@ -4004,16 +3990,27 @@ set_max_delegations(void)
 static int
 __nfs4_state_start(void)
 {
+	int ret;
+
 	boot_time = get_seconds();
 	locks_start_grace(&nfsd4_manager);
 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
 	       nfsd4_grace);
+	ret = set_callback_cred();
+	if (ret)
+		return -ENOMEM;
 	laundry_wq = create_singlethread_workqueue("nfsd4");
 	if (laundry_wq == NULL)
 		return -ENOMEM;
+	ret = nfsd4_create_callback_queue();
+	if (ret)
+		goto out_free_laundry;
 	queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
 	set_max_delegations();
-	return set_callback_cred();
+	return 0;
+out_free_laundry:
+	destroy_workqueue(laundry_wq);
+	return ret;
 }
 
 int
@@ -4075,6 +4072,7 @@ nfs4_state_shutdown(void)
 	nfs4_lock_state();
 	nfs4_release_reclaim();
 	__nfs4_state_shutdown();
+	nfsd4_destroy_callback_queue();
 	nfs4_unlock_state();
 }
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index b85437982a8d..c4c92aea8f39 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -77,6 +77,7 @@ struct nfs4_rpc_args {
 
 struct nfsd4_callback {
 	struct nfs4_rpc_args cb_args;
+	struct work_struct cb_work;
 };
 
 struct nfs4_delegation {
@@ -391,7 +392,11 @@ extern void put_nfs4_client(struct nfs4_client *clp);
 extern void nfs4_free_stateowner(struct kref *kref);
 extern int set_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_do_callback_rpc(struct work_struct *);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
+extern int nfsd4_create_callback_queue(void);
+extern void nfsd4_destroy_callback_queue(void);
+extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
 extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
 extern void nfsd4_init_recdir(char *recdir_name);
-- 
cgit v1.2.3


From b12a05cbdfdf7e4d8cbe8fa78e995f971420086b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 4 Mar 2010 11:32:59 -0500
Subject: nfsd4: cl_count is unused

Now that the shutdown sequence guarantees callbacks are shut down before
the client is destroyed, we no longer have a use for cl_count.

We'll probably reinstate a reference count on the client some day, but
it will be held by users other than callbacks.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c |  6 +-----
 fs/nfsd/nfs4state.c    | 11 +----------
 fs/nfsd/state.h        |  2 --
 3 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index e078c747f49d..5856fc8adb70 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -681,10 +681,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 static void nfsd4_cb_recall_release(void *calldata)
 {
 	struct nfs4_delegation *dp = calldata;
-	struct nfs4_client *clp = dp->dl_client;
 
 	nfs4_put_delegation(dp);
-	put_nfs4_client(clp);
 }
 
 static const struct rpc_call_ops nfsd4_cb_recall_ops = {
@@ -746,10 +744,8 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
 	dp->dl_retries = 1;
 	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
 				&nfsd4_cb_recall_ops, dp);
-	if (status) {
-		put_nfs4_client(clp);
+	if (status)
 		nfs4_put_delegation(dp);
-	}
 }
 
 void nfsd4_do_callback_rpc(struct work_struct *w)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index adc51d10d435..cf650cbb814b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -690,13 +690,6 @@ free_client(struct nfs4_client *clp)
 	kfree(clp);
 }
 
-void
-put_nfs4_client(struct nfs4_client *clp)
-{
-	if (atomic_dec_and_test(&clp->cl_count))
-		free_client(clp);
-}
-
 static void
 expire_client(struct nfs4_client *clp)
 {
@@ -735,7 +728,7 @@ expire_client(struct nfs4_client *clp)
 	nfsd4_set_callback_client(clp, NULL);
 	if (clp->cl_cb_xprt)
 		svc_xprt_put(clp->cl_cb_xprt);
-	put_nfs4_client(clp);
+	free_client(clp);
 }
 
 static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -821,7 +814,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	}
 
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
-	atomic_set(&clp->cl_count, 1);
 	atomic_set(&clp->cl_cb_conn.cb_set, 0);
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
@@ -2010,7 +2002,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
 	 * lock) we know the server hasn't removed the lease yet, we know
 	 * it's safe to take a reference: */
 	atomic_inc(&dp->dl_count);
-	atomic_inc(&dp->dl_client->cl_count);
 
 	spin_lock(&recall_lock);
 	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index c4c92aea8f39..cef20abf330c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -224,7 +224,6 @@ struct nfs4_client {
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
 	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
-	atomic_t		cl_count;	/* ref count */
 	u32			cl_firststate;	/* recovery dir creation */
 
 	/* for nfs41 */
@@ -388,7 +387,6 @@ extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
 extern int nfs4_in_grace(void);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
-extern void put_nfs4_client(struct nfs4_client *clp);
 extern void nfs4_free_stateowner(struct kref *kref);
 extern int set_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
-- 
cgit v1.2.3


From 2bf23875f55af6038a5d1c164a52cec4c24609ba Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 8 Mar 2010 12:37:27 -0500
Subject: nfsd4: rearrange cb data structures

Mainly I just want to separate the arguments used for setting up the tcp
client from the rest.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c | 25 ++++++++++++-------------
 fs/nfsd/nfs4state.c    | 20 ++++++++++----------
 fs/nfsd/state.h        | 11 ++++++-----
 3 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 5856fc8adb70..d6c46a9de422 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -455,7 +455,7 @@ static int max_cb_time(void)
 }
 
 /* Reference counting, callback cleanup, etc., all look racy as heck.
- * And why is cb_set an atomic? */
+ * And why is cl_cb_set an atomic? */
 
 int setup_callback_client(struct nfs4_client *clp)
 {
@@ -481,7 +481,7 @@ int setup_callback_client(struct nfs4_client *clp)
 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 		return -EINVAL;
 	if (cb->cb_minorversion) {
-		args.bc_xprt = clp->cl_cb_xprt;
+		args.bc_xprt = clp->cl_cb_conn.cb_xprt;
 		args.protocol = XPRT_TRANSPORT_BC_TCP;
 	}
 	/* Create RPC client */
@@ -491,7 +491,7 @@ int setup_callback_client(struct nfs4_client *clp)
 			PTR_ERR(client));
 		return PTR_ERR(client);
 	}
-	cb->cb_client = client;
+	clp->cl_cb_client = client;
 	return 0;
 
 }
@@ -509,7 +509,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
 	if (task->tk_status)
 		warn_no_callback_path(clp, task->tk_status);
 	else
-		atomic_set(&clp->cl_cb_conn.cb_set, 1);
+		atomic_set(&clp->cl_cb_set, 1);
 }
 
 static const struct rpc_call_ops nfsd4_cb_probe_ops = {
@@ -531,7 +531,6 @@ int set_callback_cred(void)
 
 void do_probe_callback(struct nfs4_client *clp)
 {
-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
@@ -539,7 +538,7 @@ void do_probe_callback(struct nfs4_client *clp)
 	};
 	int status;
 
-	status = rpc_call_async(cb->cb_client, &msg,
+	status = rpc_call_async(clp->cl_cb_client, &msg,
 				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
 				&nfsd4_cb_probe_ops, (void *)clp);
 	if (status)
@@ -554,7 +553,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 {
 	int status;
 
-	BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
+	BUG_ON(atomic_read(&clp->cl_cb_set));
 
 	status = setup_callback_client(clp);
 	if (status) {
@@ -656,7 +655,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 	switch (task->tk_status) {
 	case -EIO:
 		/* Network partition? */
-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
+		atomic_set(&clp->cl_cb_set, 0);
 		warn_no_callback_path(clp, task->tk_status);
 	case -EBADHANDLE:
 	case -NFS4ERR_BAD_STATEID:
@@ -673,7 +672,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 		rpc_restart_call(task);
 		return;
 	} else {
-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
+		atomic_set(&clp->cl_cb_set, 0);
 		warn_no_callback_path(clp, task->tk_status);
 	}
 }
@@ -709,11 +708,11 @@ void nfsd4_destroy_callback_queue(void)
 void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt
 *new)
 {
-	struct rpc_clnt *old = clp->cl_cb_conn.cb_client;
+	struct rpc_clnt *old = clp->cl_cb_client;
 
-	clp->cl_cb_conn.cb_client = new;
+	clp->cl_cb_client = new;
 	/*
-	 * After this, any work that saw the old value of cb_client will
+	 * After this, any work that saw the old value of cl_cb_client will
 	 * be gone:
 	 */
 	flush_workqueue(callback_wq);
@@ -728,7 +727,7 @@ void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt
 static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfs4_client *clp = dp->dl_client;
-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+	struct rpc_clnt *clnt = clp->cl_cb_client;
 	struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cf650cbb814b..59c9bd4c89e1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -726,8 +726,8 @@ expire_client(struct nfs4_client *clp)
 		release_session(ses);
 	}
 	nfsd4_set_callback_client(clp, NULL);
-	if (clp->cl_cb_xprt)
-		svc_xprt_put(clp->cl_cb_xprt);
+	if (clp->cl_cb_conn.cb_xprt)
+		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 	free_client(clp);
 }
 
@@ -814,7 +814,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	}
 
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
-	atomic_set(&clp->cl_cb_conn.cb_set, 0);
+	atomic_set(&clp->cl_cb_set, 0);
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
@@ -1302,8 +1302,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		move_to_confirmed(unconf);
 
 		if (cr_ses->flags & SESSION4_BACK_CHAN) {
-			unconf->cl_cb_xprt = rqstp->rq_xprt;
-			svc_xprt_get(unconf->cl_cb_xprt);
+			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+			svc_xprt_get(rqstp->rq_xprt);
 			rpc_copy_addr(
 				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
 				sa);
@@ -1607,7 +1607,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		else {
 			/* XXX: We just turn off callbacks until we can handle
 			  * change request correctly. */
-			atomic_set(&conf->cl_cb_conn.cb_set, 0);
+			atomic_set(&conf->cl_cb_set, 0);
 			expire_client(unconf);
 			status = nfs_ok;
 
@@ -2320,7 +2320,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_stateowner *sop = stp->st_stateowner;
-	struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
+	int cb_up = atomic_read(&sop->so_client->cl_cb_set);
 	struct file_lock fl, *flp = &fl;
 	int status, flag = 0;
 
@@ -2328,7 +2328,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	open->op_recall = 0;
 	switch (open->op_claim_type) {
 		case NFS4_OPEN_CLAIM_PREVIOUS:
-			if (!atomic_read(&cb->cb_set))
+			if (!cb_up)
 				open->op_recall = 1;
 			flag = open->op_delegate_type;
 			if (flag == NFS4_OPEN_DELEGATE_NONE)
@@ -2339,7 +2339,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 			 * had the chance to reclaim theirs.... */
 			if (locks_in_grace())
 				goto out;
-			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
+			if (!cb_up || !sop->so_confirmed)
 				goto out;
 			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
 				flag = NFS4_OPEN_DELEGATE_WRITE;
@@ -2510,7 +2510,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	renew_client(clp);
 	status = nfserr_cb_path_down;
 	if (!list_empty(&clp->cl_delegations)
-			&& !atomic_read(&clp->cl_cb_conn.cb_set))
+			&& !atomic_read(&clp->cl_cb_set))
 		goto out;
 	status = nfs_ok;
 out:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index cef20abf330c..cf43812e6da5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -107,9 +107,7 @@ struct nfs4_cb_conn {
 	u32                     cb_prog;
 	u32			cb_minorversion;
 	u32                     cb_ident;	/* minorversion 0 only */
-	/* RPC client info */
-	atomic_t		cb_set;     /* successful CB_NULL call */
-	struct rpc_clnt *       cb_client;
+	struct svc_xprt		*cb_xprt;	/* minorversion 1 only */
 };
 
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
@@ -223,9 +221,13 @@ struct nfs4_client {
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
-	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
 	u32			cl_firststate;	/* recovery dir creation */
 
+	/* for v4.0 and v4.1 callbacks: */
+	struct nfs4_cb_conn	cl_cb_conn;
+	struct rpc_clnt		*cl_cb_client;
+	atomic_t		cl_cb_set;
+
 	/* for nfs41 */
 	struct list_head	cl_sessions;
 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
@@ -236,7 +238,6 @@ struct nfs4_client {
 	/* We currently support a single back channel with a single slot */
 	unsigned long		cl_cb_slot_busy;
 	u32			cl_cb_seq_nr;
-	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
 };
-- 
cgit v1.2.3


From 4b21d0defcc9680da8a694e92d5fe8eb668c2c0b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 7 Mar 2010 23:39:01 -0500
Subject: nfsd4: allow 4.0 clients to change callback path

The rfc allows a client to change the callback parameters, but we didn't
previously implement it.

Teach the callbacks to rerun themselves (by placing themselves on a
workqueue) when they recognize that their rpc task has been killed and
that the callback connection has changed.

Then we can change the callback connection by setting up a new rpc
client, modifying the nfs4 client to point at it, waiting for any work
in progress to complete, and then shutting down the old client.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c | 29 ++++++++++++++++++++---------
 fs/nfsd/nfs4state.c    |  7 +++----
 fs/nfsd/state.h        |  2 +-
 3 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d6c46a9de422..ea77aa63754a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -457,9 +457,8 @@ static int max_cb_time(void)
 /* Reference counting, callback cleanup, etc., all look racy as heck.
  * And why is cl_cb_set an atomic? */
 
-int setup_callback_client(struct nfs4_client *clp)
+int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
 {
-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_timeout	timeparms = {
 		.to_initval	= max_cb_time(),
 		.to_retries	= 0,
@@ -481,7 +480,7 @@ int setup_callback_client(struct nfs4_client *clp)
 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 		return -EINVAL;
 	if (cb->cb_minorversion) {
-		args.bc_xprt = clp->cl_cb_conn.cb_xprt;
+		args.bc_xprt = cb->cb_xprt;
 		args.protocol = XPRT_TRANSPORT_BC_TCP;
 	}
 	/* Create RPC client */
@@ -491,7 +490,7 @@ int setup_callback_client(struct nfs4_client *clp)
 			PTR_ERR(client));
 		return PTR_ERR(client);
 	}
-	clp->cl_cb_client = client;
+	nfsd4_set_callback_client(clp, client);
 	return 0;
 
 }
@@ -548,14 +547,13 @@ void do_probe_callback(struct nfs4_client *clp)
 /*
  * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
  */
-void
-nfsd4_probe_callback(struct nfs4_client *clp)
+void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
 {
 	int status;
 
 	BUG_ON(atomic_read(&clp->cl_cb_set));
 
-	status = setup_callback_client(clp);
+	status = setup_callback_client(clp, cb);
 	if (status) {
 		warn_no_callback_path(clp, status);
 		return;
@@ -645,18 +643,32 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 	}
 }
 
+
 static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_delegation *dp = calldata;
 	struct nfs4_client *clp = dp->dl_client;
+	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
 
 	nfsd4_cb_done(task, calldata);
 
+	if (current_rpc_client == NULL) {
+		/* We're shutting down; give up. */
+		/* XXX: err, or is it ok just to fall through
+		 * and rpc_restart_call? */
+		return;
+	}
+
 	switch (task->tk_status) {
 	case -EIO:
 		/* Network partition? */
 		atomic_set(&clp->cl_cb_set, 0);
 		warn_no_callback_path(clp, task->tk_status);
+		if (current_rpc_client != task->tk_client) {
+			/* queue a callback on the new connection: */
+			nfsd4_cb_recall(dp);
+			return;
+		}
 	case -EBADHANDLE:
 	case -NFS4ERR_BAD_STATEID:
 		/* Race: client probably got cb_recall
@@ -705,8 +717,7 @@ void nfsd4_destroy_callback_queue(void)
 	destroy_workqueue(callback_wq);
 }
 
-void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt
-*new)
+void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
 {
 	struct rpc_clnt *old = clp->cl_cb_client;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 59c9bd4c89e1..4300d9ffe95f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1312,7 +1312,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 				cstate->minorversion;
 			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
 			unconf->cl_cb_seq_nr = 1;
-			nfsd4_probe_callback(unconf);
+			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
 		}
 		conf = unconf;
 	} else {
@@ -1605,9 +1605,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
 			status = nfserr_clid_inuse;
 		else {
-			/* XXX: We just turn off callbacks until we can handle
-			  * change request correctly. */
 			atomic_set(&conf->cl_cb_set, 0);
+			nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
 			expire_client(unconf);
 			status = nfs_ok;
 
@@ -1641,7 +1640,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 			}
 			move_to_confirmed(unconf);
 			conf = unconf;
-			nfsd4_probe_callback(conf);
+			nfsd4_probe_callback(conf, &conf->cl_cb_conn);
 			status = nfs_ok;
 		}
 	} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index cf43812e6da5..98836fd87f69 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -390,7 +390,7 @@ extern int nfs4_in_grace(void);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
 extern void nfs4_free_stateowner(struct kref *kref);
 extern int set_callback_cred(void);
-extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
 extern void nfsd4_do_callback_rpc(struct work_struct *);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern int nfsd4_create_callback_queue(void);
-- 
cgit v1.2.3


From 5771635592267758e7dc5647f2a0088aa6244159 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 21 Apr 2010 12:27:19 -0400
Subject: nfsd4: complete enforcement of 4.1 op ordering

Enforce the rules about compound op ordering.

Motivated by implementing RECLAIM_COMPLETE, for which the client is
implicit in the current session, so it is important to ensure a
succesful SEQUENCE proceeds the RECLAIM_COMPLETE.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4proc.c  | 44 ++++++++++++++++++++++++++++++--------------
 fs/nfsd/nfs4state.c | 13 +++++++++++++
 2 files changed, 43 insertions(+), 14 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 37514c469846..e147dbcb7ef7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -968,20 +968,36 @@ static struct nfsd4_operation nfsd4_ops[];
 static const char *nfsd4_op_name(unsigned opnum);
 
 /*
- * Enforce NFSv4.1 COMPOUND ordering rules.
+ * Enforce NFSv4.1 COMPOUND ordering rules:
  *
- * TODO:
- * - enforce NFS4ERR_NOT_ONLY_OP,
- * - DESTROY_SESSION MUST be the final operation in the COMPOUND request.
+ * Also note, enforced elsewhere:
+ *	- SEQUENCE other than as first op results in
+ *	  NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
+ *	- BIND_CONN_TO_SESSION must be the only op in its compound
+ *	  (Will be enforced in nfsd4_bind_conn_to_session().)
+ *	- DESTROY_SESSION must be the final operation in a compound, if
+ *	  sessionid's in SEQUENCE and DESTROY_SESSION are the same.
+ *	  (Enforced in nfsd4_destroy_session().)
  */
-static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args)
+static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
 {
-	if (args->minorversion && args->opcnt > 0) {
-		struct nfsd4_op *op = &args->ops[0];
-		return (op->status == nfserr_op_illegal) ||
-		       (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP);
-	}
-	return true;
+	struct nfsd4_op *op = &args->ops[0];
+
+	/* These ordering requirements don't apply to NFSv4.0: */
+	if (args->minorversion == 0)
+		return nfs_ok;
+	/* This is weird, but OK, not our problem: */
+	if (args->opcnt == 0)
+		return nfs_ok;
+	if (op->status == nfserr_op_illegal)
+		return nfs_ok;
+	if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
+		return nfserr_op_not_in_session;
+	if (op->opnum == OP_SEQUENCE)
+		return nfs_ok;
+	if (args->opcnt != 1)
+		return nfserr_not_only_op;
+	return nfs_ok;
 }
 
 /*
@@ -1023,13 +1039,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	if (args->minorversion > nfsd_supported_minorversion)
 		goto out;
 
-	if (!nfs41_op_ordering_ok(args)) {
+	status = nfs41_check_op_ordering(args);
+	if (status) {
 		op = &args->ops[0];
-		op->status = nfserr_sequence_pos;
+		op->status = status;
 		goto encode_op;
 	}
 
-	status = nfs_ok;
 	while (!status && resp->opcnt < args->opcnt) {
 		op = &args->ops[resp->opcnt++];
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4300d9ffe95f..bba9fff49cfe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1343,6 +1343,14 @@ out:
 	return status;
 }
 
+static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
+{
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
+	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+	return argp->opcnt == resp->opcnt;
+}
+
 __be32
 nfsd4_destroy_session(struct svc_rqst *r,
 		      struct nfsd4_compound_state *cstate,
@@ -1358,6 +1366,11 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	 * - Do we need to clear any callback info from previous session?
 	 */
 
+	if (!memcmp(&sessionid->sessionid, &cstate->session->se_sessionid,
+					sizeof(struct nfs4_sessionid))) {
+		if (!nfsd4_last_compound_op(r))
+			return nfserr_not_only_op;
+	}
 	dump_sessionid(__func__, &sessionid->sessionid);
 	spin_lock(&sessionid_lock);
 	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
-- 
cgit v1.2.3


From d03859a4aca3969efd91dc77be7efa2ae45b05d8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Apr 2010 11:30:59 +0200
Subject: nfsd: potential ERR_PTR dereference on exp_export() error paths.

We "goto finish" from several places where "exp" is an ERR_PTR.  Also I
changed the check for "fsid_key" so that it was consistent with the check
I added.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 65ddc5b8eb33..55da4d339293 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1076,9 +1076,9 @@ exp_export(struct nfsctl_export *nxp)
 		err = 0;
 finish:
 	kfree(new.ex_pathname);
-	if (exp)
+	if (!IS_ERR_OR_NULL(exp))
 		exp_put(exp);
-	if (fsid_key && !IS_ERR(fsid_key))
+	if (!IS_ERR_OR_NULL(fsid_key))
 		cache_put(&fsid_key->h, &svc_expkey_cache);
 	path_put(&path);
 out_put_clp:
-- 
cgit v1.2.3


From 26c0c75e69265961e891ed80b38fb62a548ab371 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sat, 24 Apr 2010 15:35:43 -0400
Subject: nfsd4: fix unlikely race in session replay case

In the replay case, the

	renew_client(session->se_client);

happens after we've droppped the sessionid_lock, and without holding a
reference on the session; so there's nothing preventing the session
being freed before we get here.

Thanks to Benny Halevy for catching a bug in an earlier version of this
patch.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfsd/nfs4proc.c  |  1 +
 fs/nfsd/nfs4state.c |  7 +++----
 fs/nfsd/nfs4xdr.c   | 10 ++++++----
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e147dbcb7ef7..61282f8405b5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1027,6 +1027,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	resp->rqstp = rqstp;
 	resp->cstate.minorversion = args->minorversion;
 	resp->cstate.replay_owner = NULL;
+	resp->cstate.session = NULL;
 	fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
 	fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
 	/* Use the deferral mechanism only for NFSv4.0 compounds */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bba9fff49cfe..737315c61e7e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1443,11 +1443,10 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	cstate->slot = slot;
 	cstate->session = session;
 
-	/* Hold a session reference until done processing the compound:
-	 * nfsd4_put_session called only if the cstate slot is set.
-	 */
-	nfsd4_get_session(session);
 out:
+	/* Hold a session reference until done processing the compound. */
+	if (cstate->session)
+		nfsd4_get_session(cstate->session);
 	spin_unlock(&sessionid_lock);
 	/* Renew the clientid on success and on replay */
 	if (cstate->session) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fb27b1db007b..05bc5bd63c95 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3306,10 +3306,12 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 		iov = &rqstp->rq_res.head[0];
 	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
 	BUG_ON(iov->iov_len > PAGE_SIZE);
-	if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
-		nfsd4_store_cache_entry(resp);
-		dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
-		resp->cstate.slot->sl_inuse = false;
+	if (nfsd4_has_session(cs)) {
+		if (cs->status != nfserr_replay_cache) {
+			nfsd4_store_cache_entry(resp);
+			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
+			resp->cstate.slot->sl_inuse = false;
+		}
 		nfsd4_put_session(resp->cstate.session);
 	}
 	return 1;
-- 
cgit v1.2.3


From dbd65a7e44fff4741a0b2c84bd6bace85d22c242 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 3 May 2010 19:31:33 +0300
Subject: nfsd4: use local variable in nfs4svc_encode_compoundres

'cs' is already computed, re-use it.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4xdr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 05bc5bd63c95..b27bcf33107c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3310,9 +3310,9 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 		if (cs->status != nfserr_replay_cache) {
 			nfsd4_store_cache_entry(resp);
 			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
-			resp->cstate.slot->sl_inuse = false;
+			cs->slot->sl_inuse = false;
 		}
-		nfsd4_put_session(resp->cstate.session);
+		nfsd4_put_session(cs->session);
 	}
 	return 1;
 }
-- 
cgit v1.2.3


From 5d4cec2f2fdbb3d830fa014226d0d965df548bad Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sat, 1 May 2010 12:56:06 -0400
Subject: nfsd4: fix bare destroy_session null dereference

It's legal to send a DESTROY_SESSION outside any session (as the only
operation in a compound), in which case cstate->session will be NULL;
check for that case.

While we're at it, move these checks into a separate helper function.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f05a3276ba6b..835d6cef9ae9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1352,6 +1352,13 @@ static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
 	return argp->opcnt == resp->opcnt;
 }
 
+static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
+{
+	if (!session)
+		return 0;
+	return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
+}
+
 __be32
 nfsd4_destroy_session(struct svc_rqst *r,
 		      struct nfsd4_compound_state *cstate,
@@ -1367,8 +1374,7 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	 * - Do we need to clear any callback info from previous session?
 	 */
 
-	if (!memcmp(&sessionid->sessionid, &cstate->session->se_sessionid,
-					sizeof(struct nfs4_sessionid))) {
+	if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
 		if (!nfsd4_last_compound_op(r))
 			return nfserr_not_only_op;
 	}
-- 
cgit v1.2.3


From 9089f1b4782ff52835059779fd37b7ad765a25c7 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:12:26 +0300
Subject: nfsd4: rename sessionid_lock to client_lock

In preparation to share the lock's scope to both client
and session hash tables.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 835d6cef9ae9..2313dbf086bd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -250,6 +250,9 @@ unhash_delegation(struct nfs4_delegation *dp)
  * SETCLIENTID state 
  */
 
+/* client_lock protects the session hash table */
+static DEFINE_SPINLOCK(client_lock);
+
 /* Hash tables for nfs4_clientid state */
 #define CLIENT_HASH_BITS                 4
 #define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
@@ -368,7 +371,6 @@ static void release_openowner(struct nfs4_stateowner *sop)
 	nfs4_put_stateowner(sop);
 }
 
-static DEFINE_SPINLOCK(sessionid_lock);
 #define SESSION_HASH_SIZE	512
 static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
 
@@ -566,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 
 	new->se_flags = cses->flags;
 	kref_init(&new->se_ref);
-	spin_lock(&sessionid_lock);
+	spin_lock(&client_lock);
 	list_add(&new->se_hash, &sessionid_hashtbl[idx]);
 	list_add(&new->se_perclnt, &clp->cl_sessions);
-	spin_unlock(&sessionid_lock);
+	spin_unlock(&client_lock);
 
 	status = nfs_ok;
 out:
@@ -580,7 +582,7 @@ out_free:
 	goto out;
 }
 
-/* caller must hold sessionid_lock */
+/* caller must hold client_lock */
 static struct nfsd4_session *
 find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
 {
@@ -603,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
 	return NULL;
 }
 
-/* caller must hold sessionid_lock */
+/* caller must hold client_lock */
 static void
 unhash_session(struct nfsd4_session *ses)
 {
@@ -614,9 +616,9 @@ unhash_session(struct nfsd4_session *ses)
 static void
 release_session(struct nfsd4_session *ses)
 {
-	spin_lock(&sessionid_lock);
+	spin_lock(&client_lock);
 	unhash_session(ses);
-	spin_unlock(&sessionid_lock);
+	spin_unlock(&client_lock);
 	nfsd4_put_session(ses);
 }
 
@@ -1379,15 +1381,15 @@ nfsd4_destroy_session(struct svc_rqst *r,
 			return nfserr_not_only_op;
 	}
 	dump_sessionid(__func__, &sessionid->sessionid);
-	spin_lock(&sessionid_lock);
+	spin_lock(&client_lock);
 	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
 	if (!ses) {
-		spin_unlock(&sessionid_lock);
+		spin_unlock(&client_lock);
 		goto out;
 	}
 
 	unhash_session(ses);
-	spin_unlock(&sessionid_lock);
+	spin_unlock(&client_lock);
 
 	/* wait for callbacks */
 	nfsd4_set_callback_client(ses->se_client, NULL);
@@ -1411,7 +1413,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	if (resp->opcnt != 1)
 		return nfserr_sequence_pos;
 
-	spin_lock(&sessionid_lock);
+	spin_lock(&client_lock);
 	status = nfserr_badsession;
 	session = find_in_sessionid_hashtbl(&seq->sessionid);
 	if (!session)
@@ -1454,7 +1456,7 @@ out:
 	/* Hold a session reference until done processing the compound. */
 	if (cstate->session)
 		nfsd4_get_session(cstate->session);
-	spin_unlock(&sessionid_lock);
+	spin_unlock(&client_lock);
 	/* Renew the clientid on success and on replay */
 	if (cstate->session) {
 		nfs4_lock_state();
-- 
cgit v1.2.3


From be1fdf6c4386f56271d2f690b93ef686b769587c Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:12:39 +0300
Subject: nfsd4: fold release_session into expire_client

and grab the client lock once for all the client's sessions.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2313dbf086bd..f8bf6190a5d3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -613,15 +613,6 @@ unhash_session(struct nfsd4_session *ses)
 	list_del(&ses->se_perclnt);
 }
 
-static void
-release_session(struct nfsd4_session *ses)
-{
-	spin_lock(&client_lock);
-	unhash_session(ses);
-	spin_unlock(&client_lock);
-	nfsd4_put_session(ses);
-}
-
 void
 free_session(struct kref *kref)
 {
@@ -722,12 +713,15 @@ expire_client(struct nfs4_client *clp)
 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
 		release_openowner(sop);
 	}
+	spin_lock(&client_lock);
 	while (!list_empty(&clp->cl_sessions)) {
 		struct nfsd4_session  *ses;
 		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
 				 se_perclnt);
-		release_session(ses);
+		unhash_session(ses);
+		nfsd4_put_session(ses);
 	}
+	spin_unlock(&client_lock);
 	nfsd4_set_callback_client(clp, NULL);
 	if (clp->cl_cb_conn.cb_xprt)
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
-- 
cgit v1.2.3


From 328efbab0f8ae1617448917906a12e5f568553b6 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:12:51 +0300
Subject: nfsd4: use list_move in move_to_confirmed

rather than list_del_init, list_add

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f8bf6190a5d3..aecafb2e4056 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -859,10 +859,9 @@ move_to_confirmed(struct nfs4_client *clp)
 	unsigned int strhashval;
 
 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
-	list_del_init(&clp->cl_strhash);
 	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
 	strhashval = clientstr_hashval(clp->cl_recdir);
-	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+	list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
 	renew_client(clp);
 }
 
-- 
cgit v1.2.3


From 36acb66bda512dd8159c3e1b40358c5219524868 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:13:04 +0300
Subject: nfsd4: extend the client_lock to cover cl_lru

To be used later on to hold a reference count on the client while in use by a
nfsv4.1 compound.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index aecafb2e4056..3f572cb3f896 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -250,7 +250,7 @@ unhash_delegation(struct nfs4_delegation *dp)
  * SETCLIENTID state 
  */
 
-/* client_lock protects the session hash table */
+/* client_lock protects the client lru list and session hash table */
 static DEFINE_SPINLOCK(client_lock);
 
 /* Hash tables for nfs4_clientid state */
@@ -628,8 +628,9 @@ free_session(struct kref *kref)
 	kfree(ses);
 }
 
+/* must be called under the client_lock */
 static inline void
-renew_client(struct nfs4_client *clp)
+renew_client_locked(struct nfs4_client *clp)
 {
 	/*
 	* Move client to the end to the LRU list.
@@ -641,6 +642,14 @@ renew_client(struct nfs4_client *clp)
 	clp->cl_time = get_seconds();
 }
 
+static inline void
+renew_client(struct nfs4_client *clp)
+{
+	spin_lock(&client_lock);
+	renew_client_locked(clp);
+	spin_unlock(&client_lock);
+}
+
 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
 static int
 STALE_CLIENTID(clientid_t *clid)
@@ -706,14 +715,14 @@ expire_client(struct nfs4_client *clp)
 		list_del_init(&dp->dl_recall_lru);
 		unhash_delegation(dp);
 	}
-	list_del(&clp->cl_idhash);
-	list_del(&clp->cl_strhash);
-	list_del(&clp->cl_lru);
 	while (!list_empty(&clp->cl_openowners)) {
 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
 		release_openowner(sop);
 	}
+	list_del(&clp->cl_idhash);
+	list_del(&clp->cl_strhash);
 	spin_lock(&client_lock);
+	list_del(&clp->cl_lru);
 	while (!list_empty(&clp->cl_sessions)) {
 		struct nfsd4_session  *ses;
 		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
@@ -848,8 +857,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
 	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
-	list_add_tail(&clp->cl_lru, &client_lru);
-	clp->cl_time = get_seconds();
+	renew_client(clp);
 }
 
 static void
@@ -1447,15 +1455,12 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 
 out:
 	/* Hold a session reference until done processing the compound. */
-	if (cstate->session)
-		nfsd4_get_session(cstate->session);
-	spin_unlock(&client_lock);
-	/* Renew the clientid on success and on replay */
 	if (cstate->session) {
-		nfs4_lock_state();
-		renew_client(session->se_client);
-		nfs4_unlock_state();
+		nfsd4_get_session(cstate->session);
+		/* Renew the clientid on success and on replay */
+		renew_client_locked(session->se_client);
 	}
+	spin_unlock(&client_lock);
 	dprintk("%s: return %d\n", __func__, ntohl(status));
 	return status;
 }
@@ -2564,6 +2569,8 @@ nfs4_laundromat(void)
 	dprintk("NFSD: laundromat service - starting\n");
 	if (locks_in_grace())
 		nfsd4_end_grace();
+	INIT_LIST_HEAD(&reaplist);
+	spin_lock(&client_lock);
 	list_for_each_safe(pos, next, &client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -2572,12 +2579,16 @@ nfs4_laundromat(void)
 				clientid_val = t;
 			break;
 		}
+		list_move(&clp->cl_lru, &reaplist);
+	}
+	spin_unlock(&client_lock);
+	list_for_each_safe(pos, next, &reaplist) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
 			clp->cl_clientid.cl_id);
 		nfsd4_remove_clid_dir(clp);
 		expire_client(clp);
 	}
-	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
 	list_for_each_safe(pos, next, &del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-- 
cgit v1.2.3


From 84d38ac9abf0a5bc0044c9363acaad55a9a4be0d Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:13:16 +0300
Subject: nfsd4: refactor expire_client

Separate out unhashing of the client and session.
To be used later by the laundromat.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3f572cb3f896..dede43c37336 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -693,6 +693,20 @@ free_client(struct nfs4_client *clp)
 	kfree(clp);
 }
 
+/* must be called under the client_lock */
+static inline void
+unhash_client_locked(struct nfs4_client *clp)
+{
+	list_del(&clp->cl_lru);
+	while (!list_empty(&clp->cl_sessions)) {
+		struct nfsd4_session  *ses;
+		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
+				 se_perclnt);
+		unhash_session(ses);
+		nfsd4_put_session(ses);
+	}
+}
+
 static void
 expire_client(struct nfs4_client *clp)
 {
@@ -719,21 +733,14 @@ expire_client(struct nfs4_client *clp)
 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
 		release_openowner(sop);
 	}
+	nfsd4_set_callback_client(clp, NULL);
+	if (clp->cl_cb_conn.cb_xprt)
+		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 	list_del(&clp->cl_idhash);
 	list_del(&clp->cl_strhash);
 	spin_lock(&client_lock);
-	list_del(&clp->cl_lru);
-	while (!list_empty(&clp->cl_sessions)) {
-		struct nfsd4_session  *ses;
-		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
-				 se_perclnt);
-		unhash_session(ses);
-		nfsd4_put_session(ses);
-	}
+	unhash_client_locked(clp);
 	spin_unlock(&client_lock);
-	nfsd4_set_callback_client(clp, NULL);
-	if (clp->cl_cb_conn.cb_xprt)
-		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
 	free_client(clp);
 }
 
-- 
cgit v1.2.3


From 46583e2597af649f134462d2f2c1be5e6689198d Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:13:29 +0300
Subject: nfsd4: introduce nfs4_client.cl_refcount

Currently just initialize the cl_refcount to 1
and decrement in expire_client(), conditionally freeing the
client when the refcount reaches 0.

To be used later by nfsv4.1 compounds to keep the client from
timing out while in use.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 4 +++-
 fs/nfsd/state.h     | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index dede43c37336..e439a882e0c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -740,8 +740,9 @@ expire_client(struct nfs4_client *clp)
 	list_del(&clp->cl_strhash);
 	spin_lock(&client_lock);
 	unhash_client_locked(clp);
+	if (atomic_read(&clp->cl_refcount) == 0)
+		free_client(clp);
 	spin_unlock(&client_lock);
-	free_client(clp);
 }
 
 static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -827,6 +828,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	}
 
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
+	atomic_set(&clp->cl_refcount, 0);
 	atomic_set(&clp->cl_cb_set, 0);
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 98836fd87f69..ee42a0beecfa 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -233,6 +233,8 @@ struct nfs4_client {
 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
 	u32			cl_exchange_flags;
 	struct nfs4_sessionid	cl_sessionid;
+	/* number of rpc's in progress over an associated session: */
+	atomic_t		cl_refcount;
 
 	/* for nfs41 callbacks */
 	/* We currently support a single back channel with a single slot */
-- 
cgit v1.2.3


From 07cd4909a6c0c275ef42fd27748226975919e336 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:13:41 +0300
Subject: nfsd4: mark_client_expired

Mark the client as expired under the client_lock so it won't be renewed
when an nfsv4.1 session is done, after it was explicitly expired
during processing of the compound.

Do not renew a client mark as expired (in particular, it is not
on the lru list anymore)

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 10 ++++++++++
 fs/nfsd/state.h     | 14 +++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e439a882e0c2..98aa7e8827b9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -632,6 +632,14 @@ free_session(struct kref *kref)
 static inline void
 renew_client_locked(struct nfs4_client *clp)
 {
+	if (is_client_expired(clp)) {
+		dprintk("%s: client (clientid %08x/%08x) already expired\n",
+			__func__,
+			clp->cl_clientid.cl_boot,
+			clp->cl_clientid.cl_id);
+		return;
+	}
+
 	/*
 	* Move client to the end to the LRU list.
 	*/
@@ -697,6 +705,7 @@ free_client(struct nfs4_client *clp)
 static inline void
 unhash_client_locked(struct nfs4_client *clp)
 {
+	mark_client_expired(clp);
 	list_del(&clp->cl_lru);
 	while (!list_empty(&clp->cl_sessions)) {
 		struct nfsd4_session  *ses;
@@ -836,6 +845,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_sessions);
 	INIT_LIST_HEAD(&clp->cl_lru);
+	clp->cl_time = get_seconds();
 	clear_bit(0, &clp->cl_cb_slot_busy);
 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	copy_verf(clp, verf);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ee42a0beecfa..cfd743ea4b79 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -166,7 +166,7 @@ struct nfsd4_session {
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
 	u32			se_flags;
-	struct nfs4_client	*se_client;	/* for expire_client */
+	struct nfs4_client	*se_client;
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
 	struct nfsd4_channel_attrs se_bchannel;
@@ -244,6 +244,18 @@ struct nfs4_client {
 						/* wait here for slots */
 };
 
+static inline void
+mark_client_expired(struct nfs4_client *clp)
+{
+	clp->cl_time = 0;
+}
+
+static inline bool
+is_client_expired(struct nfs4_client *clp)
+{
+	return clp->cl_time == 0;
+}
+
 /* struct nfs4_client_reset
  * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
  * upon lease reset, or from upcall to state_daemon (to read in state
-- 
cgit v1.2.3


From d76829889ac4250a18cfcc1a606bb256bb9c570c Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:13:54 +0300
Subject: nfsd4: keep a reference count on client while in use

Get a refcount on the client on SEQUENCE,
Release the refcount and renew the client when all respective compounds completed.
Do not expire the client by the laundromat while in use.
If the client was expired via another path, free it when the compounds
complete and the refcount reaches 0.

Note that unhash_client_locked must call list_del_init on cl_lru as
it may be called twice for the same client (once from nfs4_laundromat
and then from expire_client)

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 27 ++++++++++++++++++++++++---
 fs/nfsd/nfs4xdr.c   |  3 ++-
 fs/nfsd/state.h     |  1 +
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 98aa7e8827b9..cc0e9117dd16 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -701,6 +701,22 @@ free_client(struct nfs4_client *clp)
 	kfree(clp);
 }
 
+void
+release_session_client(struct nfsd4_session *session)
+{
+	struct nfs4_client *clp = session->se_client;
+
+	if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
+		return;
+	if (is_client_expired(clp)) {
+		free_client(clp);
+		session->se_client = NULL;
+	} else
+		renew_client_locked(clp);
+	spin_unlock(&client_lock);
+	nfsd4_put_session(session);
+}
+
 /* must be called under the client_lock */
 static inline void
 unhash_client_locked(struct nfs4_client *clp)
@@ -1476,8 +1492,7 @@ out:
 	/* Hold a session reference until done processing the compound. */
 	if (cstate->session) {
 		nfsd4_get_session(cstate->session);
-		/* Renew the clientid on success and on replay */
-		renew_client_locked(session->se_client);
+		atomic_inc(&session->se_client->cl_refcount);
 	}
 	spin_unlock(&client_lock);
 	dprintk("%s: return %d\n", __func__, ntohl(status));
@@ -2598,7 +2613,13 @@ nfs4_laundromat(void)
 				clientid_val = t;
 			break;
 		}
-		list_move(&clp->cl_lru, &reaplist);
+		if (atomic_read(&clp->cl_refcount)) {
+			dprintk("NFSD: client in use (clientid %08x)\n",
+				clp->cl_clientid.cl_id);
+			continue;
+		}
+		unhash_client_locked(clp);
+		list_add(&clp->cl_lru, &reaplist);
 	}
 	spin_unlock(&client_lock);
 	list_for_each_safe(pos, next, &reaplist) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5c2de471329a..126d0caabb3c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3313,7 +3313,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
 			cs->slot->sl_inuse = false;
 		}
-		nfsd4_put_session(cs->session);
+		/* Renew the clientid on success and on replay */
+		release_session_client(cs->session);
 	}
 	return 1;
 }
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index cfd743ea4b79..006c84230c7c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -420,6 +420,7 @@ extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
 extern void nfsd4_recdir_purge_old(void);
 extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
 extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+extern void release_session_client(struct nfsd4_session *);
 
 static inline void
 nfs4_put_stateowner(struct nfs4_stateowner *so)
-- 
cgit v1.2.3


From ab707e156593ff7fffd615757332dbff6616836a Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 12 May 2010 00:14:06 +0300
Subject: nfsd4: nfsd4_destroy_session must set callback client under the state
 lock

nfsd4_set_callback_client must be called under the state lock to atomically
set or unset the callback client and shutting down the previous one.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c | 1 +
 fs/nfsd/nfs4state.c    | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1d5051d46b46..77bc9d3c80fd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -718,6 +718,7 @@ void nfsd4_destroy_callback_queue(void)
 	destroy_workqueue(callback_wq);
 }
 
+/* must be called under the state lock */
 void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
 {
 	struct rpc_clnt *old = clp->cl_cb_client;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cc0e9117dd16..ede9dde52fe8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1427,8 +1427,10 @@ nfsd4_destroy_session(struct svc_rqst *r,
 	unhash_session(ses);
 	spin_unlock(&client_lock);
 
+	nfs4_lock_state();
 	/* wait for callbacks */
 	nfsd4_set_callback_client(ses->se_client, NULL);
+	nfs4_unlock_state();
 	nfsd4_put_session(ses);
 	status = nfs_ok;
 out:
-- 
cgit v1.2.3


From 4dc6ec00f6347b72312fa41dfc587d5302b05544 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 19 Apr 2010 15:11:28 -0400
Subject: nfsd4: implement reclaim_complete

This is a mandatory operation.  Also, here (not in open) is where we
should be committing the reboot recovery information.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 Documentation/filesystems/nfs/nfs41-server.txt |  2 +-
 fs/nfsd/nfs4proc.c                             |  5 ++++
 fs/nfsd/nfs4state.c                            | 33 +++++++++++++++++++++++---
 fs/nfsd/nfs4xdr.c                              | 12 +++++++++-
 fs/nfsd/xdr4.h                                 |  6 +++++
 5 files changed, 53 insertions(+), 5 deletions(-)

(limited to 'fs/nfsd')

diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index 6a53a84afc72..04884914a1c8 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |              | Section 18.17  |
    | READ                 | REQ        |              | Section 18.22  |
    | READDIR              | REQ        |              | Section 18.23  |
    | READLINK             | OPT        |              | Section 18.24  |
-NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
+   | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
    | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
    | REMOVE               | REQ        |              | Section 18.25  |
    | RENAME               | REQ        |              | Section 18.26  |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e2dc9608281b..59ec449b0c7f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1312,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
 		.op_name = "OP_SEQUENCE",
 	},
+	[OP_RECLAIM_COMPLETE] = {
+		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
+		.op_flags = ALLOWED_WITHOUT_FH,
+		.op_name = "OP_RECLAIM_COMPLETE",
+	},
 };
 
 static const char *nfsd4_op_name(unsigned opnum)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ede9dde52fe8..84b0fe9a262a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1501,6 +1501,35 @@ out:
 	return status;
 }
 
+__be32
+nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
+{
+	if (rc->rca_one_fs) {
+		if (!cstate->current_fh.fh_dentry)
+			return nfserr_nofilehandle;
+		/*
+		 * We don't take advantage of the rca_one_fs case.
+		 * That's OK, it's optional, we can safely ignore it.
+		 */
+		 return nfs_ok;
+	}
+	nfs4_lock_state();
+	if (is_client_expired(cstate->session->se_client)) {
+		nfs4_unlock_state();
+		/*
+		 * The following error isn't really legal.
+		 * But we only get here if the client just explicitly
+		 * destroyed the client.  Surely it no longer cares what
+		 * error it gets back on an operation for the dead
+		 * client.
+		 */
+		return nfserr_stale_clientid;
+	}
+	nfsd4_create_clid_dir(cstate->session->se_client);
+	nfs4_unlock_state();
+	return nfs_ok;
+}
+
 __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  struct nfsd4_setclientid *setclid)
@@ -2510,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	}
 	memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
 
-	if (nfsd4_has_session(&resp->cstate)) {
+	if (nfsd4_has_session(&resp->cstate))
 		open->op_stateowner->so_confirmed = 1;
-		nfsd4_create_clid_dir(open->op_stateowner->so_client);
-	}
 
 	/*
 	* Attempt to hand out a delegation. No error return, because the
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 126d0caabb3c..ac17a7080239 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1234,6 +1234,16 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 	DECODE_TAIL;
 }
 
+static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	READ32(rc->rca_one_fs);
+
+	DECODE_TAIL;
+}
+
 static __be32
 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
 {
@@ -1346,7 +1356,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
 	[OP_TEST_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_WANT_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_DESTROY_CLIENTID]	= (nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_RECLAIM_COMPLETE]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_RECLAIM_COMPLETE]	= (nfsd4_dec)nfsd4_decode_reclaim_complete,
 };
 
 struct nfsd4_minorversion_ops {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index c28958ec216c..4d476ff08ae6 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
 	struct nfs4_sessionid	sessionid;
 };
 
+struct nfsd4_reclaim_complete {
+	u32 rca_one_fs;
+};
+
 struct nfsd4_op {
 	int					opnum;
 	__be32					status;
@@ -421,6 +425,7 @@ struct nfsd4_op {
 		struct nfsd4_create_session	create_session;
 		struct nfsd4_destroy_session	destroy_session;
 		struct nfsd4_sequence		sequence;
+		struct nfsd4_reclaim_complete	reclaim_complete;
 	} u;
 	struct nfs4_replay *			replay;
 };
@@ -523,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_rqst *,
 extern __be32 nfsd4_destroy_session(struct svc_rqst *,
 		struct nfsd4_compound_state *,
 		struct nfsd4_destroy_session *);
+__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
 extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
 		struct nfsd4_open *open);
 extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
-- 
cgit v1.2.3


From 15ddb4aec54422ead137b03ea4e9b3f5db3f7cc2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 14 May 2010 15:33:36 +0400
Subject: NFSD: don't report compiled-out versions as present

The /proc/fs/nfsd/versions file calls nfsd_vers() to check whether
the particular nfsd version is present/available. The problem is
that once I turn off e.g. NFSD-V4 this call returns -1 which is
true from the callers POV which is wrong.

The proposal is to report false in that case.

The bug has existed since 6658d3a7bbfd1768 "[PATCH] knfsd: remove
nfsd_versbits as intermediate storage for desired versions".

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: stable@kernel.org
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfssvc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 171699eb07c8..06b2a26edfe0 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -120,7 +120,7 @@ u32 nfsd_supported_minorversion;
 int nfsd_vers(int vers, enum vers_op change)
 {
 	if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
-		return -1;
+		return 0;
 	switch(change) {
 	case NFSD_SET:
 		nfsd_versions[vers] = nfsd_version[vers];
-- 
cgit v1.2.3


From b7299f44394336f51b526247a870d47d28f4f97c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 14 May 2010 17:57:35 -0400
Subject: nfs4: minor callback code simplification, comment

Note the position in the version array doesn't have to match the actual
rpc version number--to me it seems clearer to maintain the distinction.

Also document choice of rpc callback version number, as discussed in
e.g. http://www.ietf.org/mail-archive/web/nfsv4/current/msg07985.html
and followups.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 77bc9d3c80fd..eb78e7e22077 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -424,13 +424,19 @@ static struct rpc_procinfo     nfs4_cb_procedures[] = {
 };
 
 static struct rpc_version       nfs_cb_version4 = {
+/*
+ * Note on the callback rpc program version number: despite language in rfc
+ * 5661 section 18.36.3 requiring servers to use 4 in this field, the
+ * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
+ * in practice that appears to be what implementations use.  The section
+ * 18.36.3 language is expected to be fixed in an erratum.
+ */
         .number                 = 1,
         .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
         .procs                  = nfs4_cb_procedures
 };
 
 static struct rpc_version *	nfs_cb_version[] = {
-	NULL,
 	&nfs_cb_version4,
 };
 
@@ -471,7 +477,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
 		.timeout	= &timeparms,
 		.program	= &cb_program,
 		.prognumber	= cb->cb_prog,
-		.version	= nfs_cb_version[1]->number,
+		.version	= 0,
 		.authflavor	= clp->cl_flavor,
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
 		.client_name    = clp->cl_principal,
-- 
cgit v1.2.3


From 47cee541a46a73b20dc279bf4c4690f776f6c81b Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 17 May 2010 20:00:37 +0400
Subject: nfsd: safer initialization order in find_file()

The alloc_init_file() first adds a file to the hash and then
initializes its fi_inode, fi_id and fi_had_conflict.

The uninitialized fi_inode could thus be erroneously checked by
the find_file(), so move the hash insertion lower.

The client_mutex should prevent this race in practice; however, we
eventually hope to make less use of the client_mutex, so the ordering
here is an accident waiting to happen.

I didn't find whether the same can be true for two other fields,
but the common sense tells me it's better to initialize an object
before putting it into a global hash table :)

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 84b0fe9a262a..296eded356b6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1757,12 +1757,12 @@ alloc_init_file(struct inode *ino)
 		INIT_LIST_HEAD(&fp->fi_hash);
 		INIT_LIST_HEAD(&fp->fi_stateids);
 		INIT_LIST_HEAD(&fp->fi_delegations);
-		spin_lock(&recall_lock);
-		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
-		spin_unlock(&recall_lock);
 		fp->fi_inode = igrab(ino);
 		fp->fi_id = current_fileid++;
 		fp->fi_had_conflict = false;
+		spin_lock(&recall_lock);
+		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+		spin_unlock(&recall_lock);
 		return fp;
 	}
 	return NULL;
-- 
cgit v1.2.3


From e4e83ea47babd9d4bf95a13aed87f8ef51e46472 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 22 Apr 2010 16:21:39 -0400
Subject: Revert "nfsd4: distinguish expired from stale stateids"

This reverts commit 78155ed75f470710f2aecb3e75e3d97107ba8374.

We're depending here on the boot time that we use to generate the
stateid being monotonic, but get_seconds() is not necessarily.

We still depend at least on boot_time being different every time, but
that is a safer bet.

We have a few reports of errors that might be explained by this problem,
though we haven't been able to confirm any of them.

But the minor gain of distinguishing expired from stale errors seems not
worth the risk.

Conflicts:

	fs/nfsd/nfs4state.c

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c | 56 +++++++++++------------------------------------------
 1 file changed, 11 insertions(+), 45 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 296eded356b6..12f7109720c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	dp->dl_vfs_file = stp->st_vfs_file;
 	dp->dl_type = type;
 	dp->dl_ident = cb->cb_ident;
-	dp->dl_stateid.si_boot = get_seconds();
+	dp->dl_stateid.si_boot = boot_time;
 	dp->dl_stateid.si_stateownerid = current_delegid++;
 	dp->dl_stateid.si_fileid = 0;
 	dp->dl_stateid.si_generation = 0;
@@ -1884,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
 	stp->st_file = fp;
-	stp->st_stateid.si_boot = get_seconds();
+	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
 	stp->st_stateid.si_fileid = fp->fi_id;
 	stp->st_stateid.si_generation = 0;
@@ -2733,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
 static int
 STALE_STATEID(stateid_t *stateid)
 {
-	if (time_after((unsigned long)boot_time,
-			(unsigned long)stateid->si_boot)) {
-		dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
-			STATEID_VAL(stateid));
-		return 1;
-	}
-	return 0;
-}
-
-static int
-EXPIRED_STATEID(stateid_t *stateid)
-{
-	if (time_before((unsigned long)boot_time,
-			((unsigned long)stateid->si_boot)) &&
-	    time_before((unsigned long)(stateid->si_boot + nfsd4_lease), get_seconds())) {
-		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
-			STATEID_VAL(stateid));
-		return 1;
-	}
-	return 0;
-}
-
-static __be32
-stateid_error_map(stateid_t *stateid)
-{
-	if (STALE_STATEID(stateid))
-		return nfserr_stale_stateid;
-	if (EXPIRED_STATEID(stateid))
-		return nfserr_expired;
-
-	dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
+	if (stateid->si_boot == boot_time)
+		return 0;
+	dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
 		STATEID_VAL(stateid));
-	return nfserr_bad_stateid;
+	return 1;
 }
 
 static inline int
@@ -2889,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
 	status = nfserr_bad_stateid;
 	if (is_delegation_stateid(stateid)) {
 		dp = find_delegation_stateid(ino, stateid);
-		if (!dp) {
-			status = stateid_error_map(stateid);
+		if (!dp)
 			goto out;
-		}
 		status = check_stateid_generation(stateid, &dp->dl_stateid,
 						  flags);
 		if (status)
@@ -2905,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
 			*filpp = dp->dl_vfs_file;
 	} else { /* open or lock stateid */
 		stp = find_stateid(stateid, flags);
-		if (!stp) {
-			status = stateid_error_map(stateid);
+		if (!stp)
 			goto out;
-		}
 		if (nfs4_check_fh(current_fh, stp))
 			goto out;
 		if (!stp->st_stateowner->so_confirmed)
@@ -2980,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 		 */
 		sop = search_close_lru(stateid->si_stateownerid, flags);
 		if (sop == NULL)
-			return stateid_error_map(stateid);
+			return nfserr_bad_stateid;
 		*sopp = sop;
 		goto check_replay;
 	}
@@ -3247,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (!is_delegation_stateid(stateid))
 		goto out;
 	dp = find_delegation_stateid(inode, stateid);
-	if (!dp) {
-		status = stateid_error_map(stateid);
+	if (!dp)
 		goto out;
-	}
 	status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
 	if (status)
 		goto out;
@@ -3476,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
 	stp->st_file = fp;
-	stp->st_stateid.si_boot = get_seconds();
+	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
 	stp->st_stateid.si_fileid = fp->fi_id;
 	stp->st_stateid.si_generation = 0;
-- 
cgit v1.2.3


From e970a573ce30a3976234dcfb67906c164b0df9ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Mar 2010 17:32:14 +0100
Subject: nfsd: open a file descriptor for fsync in nfs4 recovery

Instead of just looking up a path use do_filp_open to get us a file
structure for the nfs4 recovery directory.  This allows us to get
rid of the last non-standard vfs_fsync caller with a NULL file
pointer.

[AV: should be using fput(), not filp_close()]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfs4recover.c | 87 +++++++++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 45 deletions(-)

(limited to 'fs/nfsd')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 7a9ae3254a4b..dada03f2c13b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -44,8 +44,7 @@
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
 /* Globals */
-static struct path rec_dir;
-static int rec_dir_init = 0;
+static struct file *rec_file;
 
 static int
 nfs4_save_creds(const struct cred **original_creds)
@@ -117,33 +116,28 @@ out_no_tfm:
 	return status;
 }
 
-static void
-nfsd4_sync_rec_dir(void)
-{
-	vfs_fsync(NULL, rec_dir.dentry, 0);
-}
-
 int
 nfsd4_create_clid_dir(struct nfs4_client *clp)
 {
 	const struct cred *original_cred;
 	char *dname = clp->cl_recdir;
-	struct dentry *dentry;
+	struct dentry *dir, *dentry;
 	int status;
 
 	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
 
-	if (!rec_dir_init || clp->cl_firststate)
+	if (!rec_file || clp->cl_firststate)
 		return 0;
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
 		return status;
 
+	dir = rec_file->f_path.dentry;
 	/* lock the parent */
-	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_lock(&dir->d_inode->i_mutex);
 
-	dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
+	dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
 	if (IS_ERR(dentry)) {
 		status = PTR_ERR(dentry);
 		goto out_unlock;
@@ -153,18 +147,18 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 		dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
 		goto out_put;
 	}
-	status = mnt_want_write(rec_dir.mnt);
+	status = mnt_want_write(rec_file->f_path.mnt);
 	if (status)
 		goto out_put;
-	status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
-	mnt_drop_write(rec_dir.mnt);
+	status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
+	mnt_drop_write(rec_file->f_path.mnt);
 out_put:
 	dput(dentry);
 out_unlock:
-	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_unlock(&dir->d_inode->i_mutex);
 	if (status == 0) {
 		clp->cl_firststate = 1;
-		nfsd4_sync_rec_dir();
+		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
 	}
 	nfs4_reset_creds(original_cred);
 	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
@@ -206,14 +200,14 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
 	struct dentry *dentry;
 	int status;
 
-	if (!rec_dir_init)
+	if (!rec_file)
 		return 0;
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
 		return status;
 
-	filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY,
+	filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY,
 			   current_cred());
 	status = PTR_ERR(filp);
 	if (IS_ERR(filp))
@@ -250,13 +244,14 @@ out:
 static int
 nfsd4_unlink_clid_dir(char *name, int namlen)
 {
-	struct dentry *dentry;
+	struct dentry *dir, *dentry;
 	int status;
 
 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
-	mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(name, rec_dir.dentry, namlen);
+	dir = rec_file->f_path.dentry;
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+	dentry = lookup_one_len(name, dir, namlen);
 	if (IS_ERR(dentry)) {
 		status = PTR_ERR(dentry);
 		goto out_unlock;
@@ -264,11 +259,11 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
 	status = -ENOENT;
 	if (!dentry->d_inode)
 		goto out;
-	status = vfs_rmdir(rec_dir.dentry->d_inode, dentry);
+	status = vfs_rmdir(dir->d_inode, dentry);
 out:
 	dput(dentry);
 out_unlock:
-	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+	mutex_unlock(&dir->d_inode->i_mutex);
 	return status;
 }
 
@@ -278,10 +273,10 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	const struct cred *original_cred;
 	int status;
 
-	if (!rec_dir_init || !clp->cl_firststate)
+	if (!rec_file || !clp->cl_firststate)
 		return;
 
-	status = mnt_want_write(rec_dir.mnt);
+	status = mnt_want_write(rec_file->f_path.mnt);
 	if (status)
 		goto out;
 	clp->cl_firststate = 0;
@@ -293,8 +288,8 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
 	nfs4_reset_creds(original_cred);
 	if (status == 0)
-		nfsd4_sync_rec_dir();
-	mnt_drop_write(rec_dir.mnt);
+		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+	mnt_drop_write(rec_file->f_path.mnt);
 out:
 	if (status)
 		printk("NFSD: Failed to remove expired client state directory"
@@ -323,19 +318,19 @@ void
 nfsd4_recdir_purge_old(void) {
 	int status;
 
-	if (!rec_dir_init)
+	if (!rec_file)
 		return;
-	status = mnt_want_write(rec_dir.mnt);
+	status = mnt_want_write(rec_file->f_path.mnt);
 	if (status)
 		goto out;
-	status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
+	status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old);
 	if (status == 0)
-		nfsd4_sync_rec_dir();
-	mnt_drop_write(rec_dir.mnt);
+		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+	mnt_drop_write(rec_file->f_path.mnt);
 out:
 	if (status)
 		printk("nfsd4: failed to purge old clients from recovery"
-			" directory %s\n", rec_dir.dentry->d_name.name);
+			" directory %s\n", rec_file->f_path.dentry->d_name.name);
 }
 
 static int
@@ -355,10 +350,13 @@ int
 nfsd4_recdir_load(void) {
 	int status;
 
-	status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
+	if (!rec_file)
+		return 0;
+
+	status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir);
 	if (status)
 		printk("nfsd4: failed loading clients from recovery"
-			" directory %s\n", rec_dir.dentry->d_name.name);
+			" directory %s\n", rec_file->f_path.dentry->d_name.name);
 	return status;
 }
 
@@ -375,7 +373,7 @@ nfsd4_init_recdir(char *rec_dirname)
 	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
 			rec_dirname);
 
-	BUG_ON(rec_dir_init);
+	BUG_ON(rec_file);
 
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0) {
@@ -385,22 +383,21 @@ nfsd4_init_recdir(char *rec_dirname)
 		return;
 	}
 
-	status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
-			&rec_dir);
-	if (status)
+	rec_file = filp_open(rec_dirname, O_RDONLY | O_DIRECTORY, 0);
+	if (IS_ERR(rec_file)) {
 		printk("NFSD: unable to find recovery directory %s\n",
 				rec_dirname);
+		rec_file = NULL;
+	}
 
-	if (!status)
-		rec_dir_init = 1;
 	nfs4_reset_creds(original_cred);
 }
 
 void
 nfsd4_shutdown_recdir(void)
 {
-	if (!rec_dir_init)
+	if (!rec_file)
 		return;
-	rec_dir_init = 0;
-	path_put(&rec_dir);
+	fput(rec_file);
+	rec_file = NULL;
 }
-- 
cgit v1.2.3


From 8018ab057480974e7f26a387bf4ce040e9a5f6f1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Mar 2010 17:32:25 +0100
Subject: sanitize vfs_fsync calling conventions

Now that the last user passing a NULL file pointer is gone we can remove
the redundant dentry argument and associated hacks inside vfs_fsynmc_range.

The next step will be removig the dentry argument from ->fsync, but given
the luck with the last round of method prototype changes I'd rather
defer this until after the main merge window.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/loop.c                |  4 ++--
 drivers/md/bitmap.c                 |  2 +-
 drivers/usb/gadget/storage_common.c |  2 +-
 fs/ceph/file.c                      |  3 +--
 fs/coda/file.c                      |  2 +-
 fs/ecryptfs/file.c                  |  4 +---
 fs/nfsd/nfs4recover.c               |  6 +++---
 fs/nfsd/vfs.c                       |  5 ++---
 fs/sync.c                           | 42 +++++++------------------------------
 include/linux/fs.h                  |  6 +++---
 mm/msync.c                          |  2 +-
 11 files changed, 24 insertions(+), 54 deletions(-)

(limited to 'fs/nfsd')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a90e83c9be96..6120922f459f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -485,7 +485,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 				goto out;
 			}
 
-			ret = vfs_fsync(file, file->f_path.dentry, 0);
+			ret = vfs_fsync(file, 0);
 			if (unlikely(ret)) {
 				ret = -EIO;
 				goto out;
@@ -495,7 +495,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 		ret = lo_send(lo, bio, pos);
 
 		if (barrier && !ret) {
-			ret = vfs_fsync(file, file->f_path.dentry, 0);
+			ret = vfs_fsync(file, 0);
 			if (unlikely(ret))
 				ret = -EIO;
 		}
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f084249295d9..53e8bea295e2 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1692,7 +1692,7 @@ int bitmap_create(mddev_t *mddev)
 		 * and bypass the page cache, we must sync the file
 		 * first.
 		 */
-		vfs_fsync(file, file->f_dentry, 1);
+		vfs_fsync(file, 1);
 	}
 	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
 	if (!mddev->bitmap_info.external)
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 868d8ee86756..04c462ff0ea6 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -654,7 +654,7 @@ static int fsg_lun_fsync_sub(struct fsg_lun *curlun)
 
 	if (curlun->ro || !filp)
 		return 0;
-	return vfs_fsync(filp, filp->f_path.dentry, 1);
+	return vfs_fsync(filp, 1);
 }
 
 static void store_cdrom_address(u8 *dest, int msf, u32 addr)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ed6f19721d6e..7d634938edc9 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -844,8 +844,7 @@ retry_snap:
 		if ((ret >= 0 || ret == -EIOCBQUEUED) &&
 		    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
 		     || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
-			err = vfs_fsync_range(file, file->f_path.dentry,
-					      pos, pos + ret - 1, 1);
+			err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
 			if (err < 0)
 				ret = err;
 		}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 4c813f2cdc52..7196077b1688 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -217,7 +217,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	err = vfs_fsync(host_file, host_file->f_path.dentry, datasync);
+	err = vfs_fsync(host_file, datasync);
 	if ( !err && !datasync ) {
 		lock_kernel();
 		err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index e7440a6f5ebf..3bdddbcc785f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -276,9 +276,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 static int
 ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	return vfs_fsync(ecryptfs_file_to_lower(file),
-			 ecryptfs_dentry_to_lower(dentry),
-			 datasync);
+	return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
 }
 
 static int ecryptfs_fasync(int fd, struct file *file, int flag)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index dada03f2c13b..7e26caab2a26 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -158,7 +158,7 @@ out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 	if (status == 0) {
 		clp->cl_firststate = 1;
-		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+		vfs_fsync(rec_file, 0);
 	}
 	nfs4_reset_creds(original_cred);
 	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
@@ -288,7 +288,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
 	nfs4_reset_creds(original_cred);
 	if (status == 0)
-		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+		vfs_fsync(rec_file, 0);
 	mnt_drop_write(rec_file->f_path.mnt);
 out:
 	if (status)
@@ -325,7 +325,7 @@ nfsd4_recdir_purge_old(void) {
 		goto out;
 	status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old);
 	if (status == 0)
-		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+		vfs_fsync(rec_file, 0);
 	mnt_drop_write(rec_file->f_path.mnt);
 out:
 	if (status)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23c06f77f4ca..ebbf3b6b2457 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -999,7 +999,7 @@ static int wait_for_concurrent_writes(struct file *file)
 
 	if (inode->i_state & I_DIRTY) {
 		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-		err = vfs_fsync(file, file->f_path.dentry, 0);
+		err = vfs_fsync(file, 0);
 	}
 	last_ino = inode->i_ino;
 	last_dev = inode->i_sb->s_dev;
@@ -1175,8 +1175,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (err)
 		goto out;
 	if (EX_ISSYNC(fhp->fh_export)) {
-		int err2 = vfs_fsync_range(file, file->f_path.dentry,
-				offset, end, 0);
+		int err2 = vfs_fsync_range(file, offset, end, 0);
 
 		if (err2 != -EINVAL)
 			err = nfserrno(err2);
diff --git a/fs/sync.c b/fs/sync.c
index d5369203f8e4..5a537ccd2e85 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(file_fsync);
 /**
  * vfs_fsync_range - helper to sync a range of data & metadata to disk
  * @file:		file to sync
- * @dentry:		dentry of @file
  * @start:		offset in bytes of the beginning of data range to sync
  * @end:		offset in bytes of the end of data range (inclusive)
  * @datasync:		perform only datasync
@@ -166,32 +165,13 @@ EXPORT_SYMBOL(file_fsync);
  * Write back data in range @start..@end and metadata for @file to disk.  If
  * @datasync is set only metadata needed to access modified file data is
  * written.
- *
- * In case this function is called from nfsd @file may be %NULL and
- * only @dentry is set.  This can only happen when the filesystem
- * implements the export_operations API.
  */
-int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
-		    loff_t end, int datasync)
+int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	const struct file_operations *fop;
-	struct address_space *mapping;
+	struct address_space *mapping = file->f_mapping;
 	int err, ret;
 
-	/*
-	 * Get mapping and operations from the file in case we have
-	 * as file, or get the default values for them in case we
-	 * don't have a struct file available.  Damn nfsd..
-	 */
-	if (file) {
-		mapping = file->f_mapping;
-		fop = file->f_op;
-	} else {
-		mapping = dentry->d_inode->i_mapping;
-		fop = dentry->d_inode->i_fop;
-	}
-
-	if (!fop || !fop->fsync) {
+	if (!file->f_op || !file->f_op->fsync) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -203,7 +183,7 @@ int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
 	 * livelocks in fsync_buffers_list().
 	 */
 	mutex_lock(&mapping->host->i_mutex);
-	err = fop->fsync(file, dentry, datasync);
+	err = file->f_op->fsync(file, file->f_path.dentry, datasync);
 	if (!ret)
 		ret = err;
 	mutex_unlock(&mapping->host->i_mutex);
@@ -216,19 +196,14 @@ EXPORT_SYMBOL(vfs_fsync_range);
 /**
  * vfs_fsync - perform a fsync or fdatasync on a file
  * @file:		file to sync
- * @dentry:		dentry of @file
  * @datasync:		only perform a fdatasync operation
  *
  * Write back data and metadata for @file to disk.  If @datasync is
  * set only metadata needed to access modified file data is written.
- *
- * In case this function is called from nfsd @file may be %NULL and
- * only @dentry is set.  This can only happen when the filesystem
- * implements the export_operations API.
  */
-int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int vfs_fsync(struct file *file, int datasync)
 {
-	return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
+	return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync);
 
@@ -239,7 +214,7 @@ static int do_fsync(unsigned int fd, int datasync)
 
 	file = fget(fd);
 	if (file) {
-		ret = vfs_fsync(file, file->f_path.dentry, datasync);
+		ret = vfs_fsync(file, datasync);
 		fput(file);
 	}
 	return ret;
@@ -267,8 +242,7 @@ int generic_write_sync(struct file *file, loff_t pos, loff_t count)
 {
 	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
 		return 0;
-	return vfs_fsync_range(file, file->f_path.dentry, pos,
-			       pos + count - 1,
+	return vfs_fsync_range(file, pos, pos + count - 1,
 			       (file->f_flags & __O_SYNC) ? 0 : 1);
 }
 EXPORT_SYMBOL(generic_write_sync);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6660db898c41..f3e108314c93 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2084,9 +2084,9 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
 extern int filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end);
 
-extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
-			   loff_t start, loff_t end, int datasync);
-extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
+extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
+			   int datasync);
+extern int vfs_fsync(struct file *file, int datasync);
 extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
 extern void sync_supers(void);
 extern void emergency_sync(void);
diff --git a/mm/msync.c b/mm/msync.c
index 4083209b7f02..632df4527c01 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -82,7 +82,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 				(vma->vm_flags & VM_SHARED)) {
 			get_file(file);
 			up_read(&mm->mmap_sem);
-			error = vfs_fsync(file, file->f_path.dentry, 0);
+			error = vfs_fsync(file, 0);
 			fput(file);
 			if (error || start >= end)
 				goto out;
-- 
cgit v1.2.3