diff options
Diffstat (limited to 'fs/nfsd/nfssvc.c')
-rw-r--r-- | fs/nfsd/nfssvc.c | 220 |
1 files changed, 126 insertions, 94 deletions
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 80431921e5d7..07193595b8e0 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/fs_struct.h> #include <linux/swap.h> +#include <linux/siphash.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> @@ -55,18 +56,17 @@ static __be32 nfsd_init_request(struct svc_rqst *, struct svc_process_info *); /* - * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members - * of the svc_serv struct. In particular, ->sv_nrthreads but also to some - * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members + * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks. * * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a - * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number - * of nfsd threads must exist and each must listed in ->sp_all_threads in each - * entry of ->sv_pools[]. + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless + * nn->keep_active is set). That number of nfsd threads must + * exist and each must be listed in ->sp_all_threads in some entry of + * ->sv_pools[]. * - * Transitions of the thread count between zero and non-zero are of particular - * interest since the svc_serv needs to be created and initialized at that - * point, or freed. + * Each active thread holds a counted reference on nn->nfsd_serv, as does + * the nn->keep_active flag and various transient calls to svc_get(). * * Finally, the nfsd_mutex also protects some of the global variables that are * accessed when nfsd starts and that are settable via the write_* routines in @@ -345,33 +345,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn) return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } -void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn) +/** + * nfsd_copy_write_verifier - Atomically copy a write verifier + * @verf: buffer in which to receive the verifier cookie + * @nn: NFS net namespace + * + * This function provides a wait-free mechanism for copying the + * namespace's write verifier without tearing it. + */ +void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) { int seq = 0; do { - read_seqbegin_or_lock(&nn->boot_lock, &seq); - /* - * This is opaque to client, so no need to byte-swap. Use - * __force to keep sparse happy. y2038 time_t overflow is - * irrelevant in this usage - */ - verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; - } while (need_seqretry(&nn->boot_lock, seq)); - done_seqretry(&nn->boot_lock, seq); + read_seqbegin_or_lock(&nn->writeverf_lock, &seq); + memcpy(verf, nn->writeverf, sizeof(*verf)); + } while (need_seqretry(&nn->writeverf_lock, seq)); + done_seqretry(&nn->writeverf_lock, seq); } -static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn) +static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn) { - ktime_get_real_ts64(&nn->nfssvc_boot); + struct timespec64 now; + u64 verf; + + /* + * Because the time value is hashed, y2038 time_t overflow + * is irrelevant in this usage. + */ + ktime_get_raw_ts64(&now); + verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key); + memcpy(nn->writeverf, &verf, sizeof(nn->writeverf)); } -void nfsd_reset_boot_verifier(struct nfsd_net *nn) +/** + * nfsd_reset_write_verifier - Generate a new write verifier + * @nn: NFS net namespace + * + * This function updates the ->writeverf field of @nn. This field + * contains an opaque cookie that, according to Section 18.32.3 of + * RFC 8881, "the client can use to determine whether a server has + * changed instance state (e.g., server restart) between a call to + * WRITE and a subsequent call to either WRITE or COMMIT. This + * cookie MUST be unchanged during a single instance of the NFSv4.1 + * server and MUST be unique between instances of the NFSv4.1 + * server." + */ +void nfsd_reset_write_verifier(struct nfsd_net *nn) { - write_seqlock(&nn->boot_lock); - nfsd_reset_boot_verifier_locked(nn); - write_sequnlock(&nn->boot_lock); + write_seqlock(&nn->writeverf_lock); + nfsd_reset_write_verifier_locked(nn); + write_sequnlock(&nn->writeverf_lock); } static int nfsd_startup_net(struct net *net, const struct cred *cred) @@ -435,6 +459,7 @@ static void nfsd_shutdown_net(struct net *net) nfsd_shutdown_generic(); } +static DEFINE_SPINLOCK(nfsd_notifier_lock); static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -444,18 +469,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); out: return NOTIFY_DONE; @@ -475,10 +499,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; @@ -487,8 +511,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); + out: return NOTIFY_DONE; } @@ -505,7 +529,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - atomic_dec(&nn->ntf_refcnt); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -513,7 +536,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0); /* * write_ports can create the server without actually starting @@ -594,20 +616,9 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, .svo_module = THIS_MODULE, }; -static void nfsd_complete_shutdown(struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - WARN_ON(!mutex_is_locked(&nfsd_mutex)); - - nn->nfsd_serv = NULL; - complete(&nn->nfsd_shutdown_complete); -} - void nfsd_shutdown_threads(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -622,11 +633,9 @@ void nfsd_shutdown_threads(struct net *net) svc_get(serv); /* Kill outstanding nfsd threads */ - serv->sv_ops->svo_setup(serv, NULL, 0); - nfsd_destroy(net); + svc_set_num_threads(serv, NULL, 0); + nfsd_put(net); mutex_unlock(&nfsd_mutex); - /* Wait for shutdown of nfsd_serv to complete */ - wait_for_completion(&nn->nfsd_shutdown_complete); } bool i_am_nfsd(void) @@ -638,6 +647,7 @@ int nfsd_create_serv(struct net *net) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nn->nfsd_serv) { @@ -647,19 +657,23 @@ int nfsd_create_serv(struct net *net) if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_thread_sv_ops); - if (nn->nfsd_serv == NULL) + serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + &nfsd_thread_sv_ops); + if (serv == NULL) return -ENOMEM; - init_completion(&nn->nfsd_shutdown_complete); - nn->nfsd_serv->sv_maxconn = nn->max_connections; - error = svc_bind(nn->nfsd_serv, net); + serv->sv_maxconn = nn->max_connections; + error = svc_bind(serv, net); if (error < 0) { - svc_destroy(nn->nfsd_serv); - nfsd_complete_shutdown(net); + /* NOT nfsd_put() as notifiers (see below) haven't + * been set up yet. + */ + svc_put(serv); return error; } + spin_lock(&nfsd_notifier_lock); + nn->nfsd_serv = serv; + spin_unlock(&nfsd_notifier_lock); set_max_drc(); /* check if the notifier is already set */ @@ -669,8 +683,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - atomic_inc(&nn->ntf_refcnt); - nfsd_reset_boot_verifier(nn); + nfsd_reset_write_verifier(nn); return 0; } @@ -697,16 +710,26 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) return 0; } -void nfsd_destroy(struct net *net) +/* This is the callback for kref_put() below. + * There is no code here as the first thing to be done is + * call svc_shutdown_net(), but we cannot get the 'net' from + * the kref. So do all the work when kref_put returns true. + */ +static void nfsd_noop(struct kref *ref) +{ +} + +void nfsd_put(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - int destroy = (nn->nfsd_serv->sv_nrthreads == 1); - if (destroy) + if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) { svc_shutdown_net(nn->nfsd_serv, net); - svc_destroy(nn->nfsd_serv); - if (destroy) - nfsd_complete_shutdown(net); + svc_destroy(&nn->nfsd_serv->sv_refcnt); + spin_lock(&nfsd_notifier_lock); + nn->nfsd_serv = NULL; + spin_unlock(&nfsd_notifier_lock); + } } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) @@ -733,7 +756,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) if (tot > NFSD_MAXSERVS) { /* total too large: scale down requested numbers */ for (i = 0; i < n && tot > 0; i++) { - int new = nthreads[i] * NFSD_MAXSERVS / tot; + int new = nthreads[i] * NFSD_MAXSERVS / tot; tot -= (nthreads[i] - new); nthreads[i] = new; } @@ -753,12 +776,13 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* apply the new numbers */ svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - &nn->nfsd_serv->sv_pools[i], nthreads[i]); + err = svc_set_num_threads(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], + nthreads[i]); if (err) break; } - nfsd_destroy(net); + nfsd_put(net); return err; } @@ -795,21 +819,19 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) error = nfsd_startup_net(net, cred); if (error) - goto out_destroy; - error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - NULL, nrservs); + goto out_put; + error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); if (error) goto out_shutdown; - /* We are holding a reference to nn->nfsd_serv which - * we don't want to count in the return value, - * so subtract 1 - */ - error = nn->nfsd_serv->sv_nrthreads - 1; + error = nn->nfsd_serv->sv_nrthreads; out_shutdown: if (error < 0 && !nfsd_up_before) nfsd_shutdown_net(net); -out_destroy: - nfsd_destroy(net); /* Release server */ +out_put: + /* Threads now hold service active */ + if (xchg(&nn->keep_active, 0)) + nfsd_put(net); + nfsd_put(net); out: mutex_unlock(&nfsd_mutex); return error; @@ -923,9 +945,6 @@ nfsd(void *vrqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; - /* Lock module and set up kernel thread */ - mutex_lock(&nfsd_mutex); - /* At this point, the thread shares current->fs * with the init process. We need to create files with the * umask as defined by the client instead of init's umask. */ @@ -945,8 +964,7 @@ nfsd(void *vrqstp) allow_signal(SIGINT); allow_signal(SIGQUIT); - nfsdstats.th_cnt++; - mutex_unlock(&nfsd_mutex); + atomic_inc(&nfsdstats.th_cnt); set_freezable(); @@ -973,19 +991,35 @@ nfsd(void *vrqstp) /* Clear signals before calling svc_exit_thread() */ flush_signals(current); - mutex_lock(&nfsd_mutex); - nfsdstats.th_cnt --; + atomic_dec(&nfsdstats.th_cnt); out: - rqstp->rq_server = NULL; + /* Take an extra ref so that the svc_put in svc_exit_thread() + * doesn't call svc_destroy() + */ + svc_get(nn->nfsd_serv); /* Release the thread */ svc_exit_thread(rqstp); - nfsd_destroy(net); + /* We need to drop a ref, but may not drop the last reference + * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that + * could deadlock with nfsd_shutdown_threads() waiting for us. + * So three options are: + * - drop a non-final reference, + * - get the mutex without waiting + * - sleep briefly andd try the above again + */ + while (!svc_put_not_last(nn->nfsd_serv)) { + if (mutex_trylock(&nfsd_mutex)) { + nfsd_put(net); + mutex_unlock(&nfsd_mutex); + break; + } + msleep(20); + } /* Release module */ - mutex_unlock(&nfsd_mutex); module_put_and_exit(0); return 0; } @@ -1096,7 +1130,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) mutex_unlock(&nfsd_mutex); return -ENODEV; } - /* bump up the psudo refcount while traversing */ svc_get(nn->nfsd_serv); ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); @@ -1109,8 +1142,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file) struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); - /* this function really, really should have been called svc_put() */ - nfsd_destroy(net); + nfsd_put(net); mutex_unlock(&nfsd_mutex); return ret; } |