diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-11 19:33:13 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-11 19:33:13 +0200 |
commit | c742b63473b3c5180db8b5d74fdbd56e4371dfa2 (patch) | |
tree | e0f317452689a21a2f08ceef7b211feac6c1a4ab | |
parent | Merge branch 'work.epoll' of git://git.kernel.org/pub/scm/linux/kernel/git/vi... (diff) | |
parent | sunrpc: use kmemdup_nul() in gssp_stringify() (diff) | |
download | linux-c742b63473b3c5180db8b5d74fdbd56e4371dfa2.tar.xz linux-c742b63473b3c5180db8b5d74fdbd56e4371dfa2.zip |
Merge tag 'nfsd-5.8' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
"Highlights:
- Keep nfsd clients from unnecessarily breaking their own
delegations.
Note this requires a small kthreadd addition. The result is Tejun
Heo's suggestion (see link), and he was OK with this going through
my tree.
- Patch nfsd/clients/ to display filenames, and to fix byte-order
when displaying stateid's.
- fix a module loading/unloading bug, from Neil Brown.
- A big series from Chuck Lever with RPC/RDMA and tracing
improvements, and lay some groundwork for RPC-over-TLS"
Link: https://lore.kernel.org/r/1588348912-24781-1-git-send-email-bfields@redhat.com
* tag 'nfsd-5.8' of git://linux-nfs.org/~bfields/linux: (49 commits)
sunrpc: use kmemdup_nul() in gssp_stringify()
nfsd: safer handling of corrupted c_type
nfsd4: make drc_slab global, not per-net
SUNRPC: Remove unreachable error condition in rpcb_getport_async()
nfsd: Fix svc_xprt refcnt leak when setup callback client failed
sunrpc: clean up properly in gss_mech_unregister()
sunrpc: svcauth_gss_register_pseudoflavor must reject duplicate registrations.
sunrpc: check that domain table is empty at module unload.
NFSD: Fix improperly-formatted Doxygen comments
NFSD: Squash an annoying compiler warning
SUNRPC: Clean up request deferral tracepoints
NFSD: Add tracepoints for monitoring NFSD callbacks
NFSD: Add tracepoints to the NFSD state management code
NFSD: Add tracepoints to NFSD's duplicate reply cache
SUNRPC: svc_show_status() macro should have enum definitions
SUNRPC: Restructure svc_udp_recvfrom()
SUNRPC: Refactor svc_recvfrom()
SUNRPC: Clean up svc_release_skb() functions
SUNRPC: Refactor recvfrom path dealing with incomplete TCP receives
SUNRPC: Replace dprintk() call sites in TCP receive path
...
41 files changed, 1524 insertions, 651 deletions
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index eb71156bcb7c..318605de83f3 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -429,6 +429,7 @@ prototypes:: int (*lm_grant)(struct file_lock *, struct file_lock *, int); void (*lm_break)(struct file_lock *); /* break_lease callback */ int (*lm_change)(struct file_lock **, int); + bool (*lm_breaker_owns_lease)(struct file_lock *); locking rules: @@ -439,6 +440,7 @@ lm_notify: yes yes no lm_grant: no no no lm_break: yes no no lm_change yes no no +lm_breaker_owns_lease: no no no ========== ============= ================= ========= buffer_head diff --git a/fs/locks.c b/fs/locks.c index 6fd1f6e83178..7df0f9fa66f4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1557,6 +1557,9 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { bool rc; + if (lease->fl_lmops->lm_breaker_owns_lease + && lease->fl_lmops->lm_breaker_owns_lease(lease)) + return false; if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { rc = false; goto trace; diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 10ec5ecdf117..65c331f75e9c 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -78,6 +78,8 @@ enum { /* Checksum this amount of the request */ #define RC_CSUMLEN (256U) +int nfsd_drc_slab_create(void); +void nfsd_drc_slab_free(void); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 09aa545825bd..9217cb64bf0e 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -139,7 +139,6 @@ struct nfsd_net { * Duplicate reply cache */ struct nfsd_drc_bucket *drc_hashtbl; - struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ unsigned int max_drc_entries; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5cf91322de0f..7fbe9840a03e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -38,6 +38,7 @@ #include "nfsd.h" #include "state.h" #include "netns.h" +#include "trace.h" #include "xdr4cb.h" #include "xdr4.h" @@ -904,16 +905,20 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && - (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { + trace_nfsd_cb_setup_err(clp, -EINVAL); return -EINVAL; + } args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog; args.protocol = XPRT_TRANSPORT_TCP; args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { - if (!conn->cb_xprt) + if (!conn->cb_xprt) { + trace_nfsd_cb_setup_err(clp, -EINVAL); return -EINVAL; + } clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; @@ -925,32 +930,27 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { - dprintk("NFSD: couldn't create callback client: %ld\n", - PTR_ERR(client)); + trace_nfsd_cb_setup_err(clp, PTR_ERR(client)); return PTR_ERR(client); } cred = get_backchannel_cred(clp, client, ses); if (!cred) { + trace_nfsd_cb_setup_err(clp, -ENOMEM); rpc_shutdown_client(client); return -ENOMEM; } clp->cl_cb_client = client; clp->cl_cb_cred = cred; + trace_nfsd_cb_setup(clp); return 0; } -static void warn_no_callback_path(struct nfs4_client *clp, int reason) -{ - dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", - (int)clp->cl_name.len, clp->cl_name.data, reason); -} - static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; clp->cl_cb_state = NFSD4_CB_DOWN; - warn_no_callback_path(clp, reason); + trace_nfsd_cb_state(clp); } static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) @@ -958,17 +958,20 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; clp->cl_cb_state = NFSD4_CB_FAULT; - warn_no_callback_path(clp, reason); + trace_nfsd_cb_state(clp); } static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + trace_nfsd_cb_done(clp, task->tk_status); if (task->tk_status) nfsd4_mark_cb_down(clp, task->tk_status); - else + else { clp->cl_cb_state = NFSD4_CB_UP; + trace_nfsd_cb_state(clp); + } } static void nfsd4_cb_probe_release(void *calldata) @@ -993,6 +996,7 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { void nfsd4_probe_callback(struct nfs4_client *clp) { clp->cl_cb_state = NFSD4_CB_UNKNOWN; + trace_nfsd_cb_state(clp); set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); nfsd4_run_cb(&clp->cl_cb_null); } @@ -1009,6 +1013,7 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) spin_lock(&clp->cl_lock); memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); spin_unlock(&clp->cl_lock); + trace_nfsd_cb_state(clp); } /* @@ -1165,8 +1170,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; - dprintk("%s: minorversion=%d\n", __func__, - clp->cl_minorversion); + trace_nfsd_cb_done(clp, task->tk_status); if (!nfsd4_cb_sequence_done(task, cb)) return; @@ -1271,6 +1275,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) * kill the old client: */ if (clp->cl_cb_client) { + trace_nfsd_cb_shutdown(clp); rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; put_cred(clp->cl_cb_cred); @@ -1301,6 +1306,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) err = setup_callback_client(clp, &conn, ses); if (err) { nfsd4_mark_cb_down(clp, err); + if (c) + svc_xprt_put(c->cn_xprt); return; } } @@ -1314,6 +1321,8 @@ nfsd4_run_cb_work(struct work_struct *work) struct rpc_clnt *clnt; int flags; + trace_nfsd_cb_work(clp, cb->cb_msg.rpc_proc->p_name); + if (cb->cb_need_restart) { cb->cb_need_restart = false; } else { diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0e75f7fb5fec..a09c35f0f6f0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1155,7 +1155,7 @@ extern void nfs_sb_deactive(struct super_block *sb); #define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" -/** +/* * Support one copy source server for now. */ static __be32 @@ -1245,10 +1245,9 @@ nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) mntput(ss_mnt); } -/** - * nfsd4_setup_inter_ssc - * +/* * Verify COPY destination stateid. + * * Connect to the source server with NFSv4.1. * Create the source struct file for nfsd_copy_range. * Called with COPY cstate: @@ -2302,6 +2301,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) } check_if_stalefh_allowed(args); + rqstp->rq_lease_breaker = (void **)&cstate->clp; + trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c107caa56525..bb3d2c32664a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -51,6 +51,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -167,9 +168,6 @@ renew_client_locked(struct nfs4_client *clp) return; } - dprintk("renewing client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); } @@ -1922,8 +1920,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) */ if (clid->cl_boot == (u32)nn->boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n", - clid->cl_boot, clid->cl_id, nn->boot_time); + trace_nfsd_clid_stale(clid); return 1; } @@ -2406,6 +2403,11 @@ static void states_stop(struct seq_file *s, void *v) spin_unlock(&clp->cl_lock); } +static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) +{ + seq_printf(s, "filename: \"%pD2\"", f->nf_file); +} + static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { struct inode *inode = f->nf_inode; @@ -2422,6 +2424,12 @@ static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); } +static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid) +{ + seq_printf(s, "0x%.8x", stid->si_generation); + seq_printf(s, "%12phN", &stid->si_opaque); +} + static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_ol_stateid *ols; @@ -2437,7 +2445,9 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) nf = st->sc_file; file = find_any_file(nf); - seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: open, "); access = bmap_to_share_mode(ols->st_access_bmap); deny = bmap_to_share_mode(ols->st_deny_bmap); @@ -2451,6 +2461,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) nfs4_show_superblock(s, file); seq_printf(s, ", "); + nfs4_show_fname(s, file); + seq_printf(s, ", "); nfs4_show_owner(s, oo); seq_printf(s, " }\n"); nfsd_file_put(file); @@ -2470,7 +2482,9 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) nf = st->sc_file; file = find_any_file(nf); - seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: lock, "); /* * Note: a lock stateid isn't really the same thing as a lock, @@ -2482,6 +2496,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) nfs4_show_superblock(s, file); /* XXX: open stateid? */ seq_printf(s, ", "); + nfs4_show_fname(s, file); + seq_printf(s, ", "); nfs4_show_owner(s, oo); seq_printf(s, " }\n"); nfsd_file_put(file); @@ -2499,7 +2515,9 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) nf = st->sc_file; file = nf->fi_deleg_file; - seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: deleg, "); /* Kinda dead code as long as we only support read delegs: */ seq_printf(s, "access: %s, ", @@ -2508,6 +2526,8 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) /* XXX: lease time, whether it's being recalled. */ nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_fname(s, file); seq_printf(s, " }\n"); return 0; @@ -2521,11 +2541,15 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) ls = container_of(st, struct nfs4_layout_stateid, ls_stid); file = ls->ls_file; - seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: layout, "); /* XXX: What else would be useful? */ nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_fname(s, file); seq_printf(s, " }\n"); return 0; @@ -2845,14 +2869,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); + trace_nfsd_cb_args(clp, conn); return; out_err: conn->cb_addr.ss_family = AF_UNSPEC; conn->cb_addrlen = 0; - dprintk("NFSD: this client (clientid %08x/%08x) " - "will not receive delegations\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - + trace_nfsd_cb_nodelegs(clp); return; } @@ -3458,6 +3480,45 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, return nfs_ok; } +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) +{ + struct nfsd4_conn *c; + + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_xprt == xpt) { + return c; + } + } + return NULL; +} + +static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, + struct nfsd4_session *session, u32 req) +{ + struct nfs4_client *clp = session->se_client; + struct svc_xprt *xpt = rqst->rq_xprt; + struct nfsd4_conn *c; + __be32 status; + + /* Following the last paragraph of RFC 5661 Section 18.34.3: */ + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(xpt, session); + if (!c) + status = nfserr_noent; + else if (req == c->cn_flags) + status = nfs_ok; + else if (req == NFS4_CDFC4_FORE_OR_BOTH && + c->cn_flags != NFS4_CDFC4_BACK) + status = nfs_ok; + else if (req == NFS4_CDFC4_BACK_OR_BOTH && + c->cn_flags != NFS4_CDFC4_FORE) + status = nfs_ok; + else + status = nfserr_inval; + spin_unlock(&clp->cl_lock); + return status; +} + __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -3479,6 +3540,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(session->se_client, rqstp)) goto out; + status = nfsd4_match_existing_connection(rqstp, session, bcts->dir); + if (status == nfs_ok || status == nfserr_inval) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; @@ -3544,18 +3608,6 @@ out: return status; } -static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) -{ - struct nfsd4_conn *c; - - list_for_each_entry(c, &s->se_conns, cn_persession) { - if (c->cn_xprt == xpt) { - return c; - } - } - return NULL; -} - static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; @@ -3879,23 +3931,18 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (clp_used_exchangeid(conf)) goto out; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - char addr_str[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, - sizeof(addr_str)); - dprintk("NFSD: setclientid: string in use by client " - "at %s\n", addr_str); + trace_nfsd_clid_inuse_err(conf); goto out; } } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) unhash_client_locked(unconf); + /* We need to handle only case 1: probable callback update */ if (conf && same_verf(&conf->cl_verifier, &clverifier)) { - /* case 1: probable callback update */ copy_clid(new, conf); gen_confirm(new, nn); - } else /* case 4 (new client) or cases 2, 3 (client reboot): */ - ; + } new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); @@ -4076,7 +4123,6 @@ out_free_openowner_slab: out_free_client_slab: kmem_cache_destroy(client_slab); out: - dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } @@ -4508,6 +4554,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; + trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid); + /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -4522,6 +4570,19 @@ nfsd_break_deleg_cb(struct file_lock *fl) return ret; } +static bool nfsd_breaker_owns_lease(struct file_lock *fl) +{ + struct nfs4_delegation *dl = fl->fl_owner; + struct svc_rqst *rqst; + struct nfs4_client *clp; + + if (!i_am_nfsd()) + return NULL; + rqst = kthread_data(current); + clp = *(rqst->rq_lease_breaker); + return dl->dl_stid.sc_client == clp; +} + static int nfsd_change_deleg_cb(struct file_lock *onlist, int arg, struct list_head *dispose) @@ -4533,6 +4594,7 @@ nfsd_change_deleg_cb(struct file_lock *onlist, int arg, } static const struct lock_manager_operations nfsd_lease_mng_ops = { + .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, }; @@ -5018,8 +5080,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); - dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", - STATEID_VAL(&dp->dl_stid.sc_stateid)); + trace_nfsd_deleg_open(&dp->dl_stid.sc_stateid); open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; nfs4_put_stid(&dp->dl_stid); return; @@ -5136,9 +5197,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf nfs4_open_delegation(current_fh, open, stp); nodeleg: status = nfs_ok; - - dprintk("%s: stateid=" STATEID_FMT "\n", __func__, - STATEID_VAL(&stp->st_stid.sc_stateid)); + trace_nfsd_deleg_none(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && @@ -5192,8 +5251,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("process_renew(%08x/%08x): starting\n", - clid->cl_boot, clid->cl_id); + trace_nfsd_clid_renew(clid); status = lookup_clientid(clid, cstate, nn, false); if (status) goto out; @@ -5214,6 +5272,7 @@ nfsd4_end_grace(struct nfsd_net *nn) if (nn->grace_ended) return; + trace_nfsd_grace_complete(nn); nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 @@ -5279,13 +5338,10 @@ nfs4_laundromat(struct nfsd_net *nn) copy_stateid_t *cps_t; int i; - dprintk("NFSD: laundromat service - starting\n"); - if (clients_still_reclaiming(nn)) { new_timeo = 0; goto out; } - dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); @@ -5307,8 +5363,7 @@ nfs4_laundromat(struct nfsd_net *nn) break; } if (mark_client_expired_locked(clp)) { - dprintk("NFSD: client in use (clientid %08x)\n", - clp->cl_clientid.cl_id); + trace_nfsd_clid_expired(&clp->cl_clientid); continue; } list_add(&clp->cl_lru, &reaplist); @@ -5316,8 +5371,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); - dprintk("NFSD: purging unused client (clientid %08x)\n", - clp->cl_clientid.cl_id); + trace_nfsd_clid_purged(&clp->cl_clientid); list_del_init(&clp->cl_lru); expire_client(clp); } @@ -5407,7 +5461,6 @@ laundromat_main(struct work_struct *laundry) laundromat_work); t = nfs4_laundromat(nn); - dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } @@ -5948,8 +6001,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, struct nfs4_stid *s; struct nfs4_ol_stateid *stp = NULL; - dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, - seqid, STATEID_VAL(stateid)); + trace_nfsd_preprocess(seqid, stateid); *stpp = NULL; status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn); @@ -6018,9 +6070,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); - dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", - __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - + trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid); nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; put_stateid: @@ -7072,7 +7122,7 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, unsigned int strhashval; struct nfs4_client_reclaim *crp; - dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data); + trace_nfsd_clid_reclaim(nn, name.len, name.data); crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); @@ -7122,7 +7172,7 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data); + trace_nfsd_clid_find(nn, name.len, name.data); strhashval = clientstr_hashval(name); list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { @@ -7686,6 +7736,9 @@ nfsd_recall_delegations(struct list_head *reaplist) list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { list_del_init(&dp->dl_recall_lru); clp = dp->dl_stid.sc_client; + + trace_nfsd_deleg_recall(&dp->dl_stid.sc_stateid); + /* * We skipped all entries that had a zero dl_time before, * so we can now reset the dl_time back to 0. If a delegation @@ -7868,6 +7921,7 @@ nfs4_state_start_net(struct net *net) goto skip_grace; printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); + trace_nfsd_grace_start(nn); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 96352ab7bd81..0a0cf1fd77d3 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -20,8 +20,7 @@ #include "nfsd.h" #include "cache.h" - -#define NFSDDBG_FACILITY NFSDDBG_REPCACHE +#include "trace.h" /* * We use this value to determine the number of hash buckets from the max @@ -36,6 +35,8 @@ struct nfsd_drc_bucket { spinlock_t cache_lock; }; +static struct kmem_cache *drc_slab; + static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); @@ -95,7 +96,7 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, { struct svc_cacherep *rp; - rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; @@ -129,7 +130,7 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, atomic_dec(&nn->num_drc_entries); nn->drc_mem_usage -= sizeof(*rp); } - kmem_cache_free(nn->drc_slab, rp); + kmem_cache_free(drc_slab, rp); } static void @@ -141,6 +142,18 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, spin_unlock(&b->cache_lock); } +int nfsd_drc_slab_create(void) +{ + drc_slab = kmem_cache_create("nfsd_drc", + sizeof(struct svc_cacherep), 0, 0, NULL); + return drc_slab ? 0: -ENOMEM; +} + +void nfsd_drc_slab_free(void) +{ + kmem_cache_destroy(drc_slab); +} + int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; @@ -159,18 +172,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) if (status) goto out_nomem; - nn->drc_slab = kmem_cache_create("nfsd_drc", - sizeof(struct svc_cacherep), 0, 0, NULL); - if (!nn->drc_slab) - goto out_shrinker; - nn->drc_hashtbl = kcalloc(hashsize, sizeof(*nn->drc_hashtbl), GFP_KERNEL); if (!nn->drc_hashtbl) { nn->drc_hashtbl = vzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl))); if (!nn->drc_hashtbl) - goto out_slab; + goto out_shrinker; } for (i = 0; i < hashsize; i++) { @@ -180,8 +188,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) nn->drc_hashsize = hashsize; return 0; -out_slab: - kmem_cache_destroy(nn->drc_slab); out_shrinker: unregister_shrinker(&nn->nfsd_reply_cache_shrinker); out_nomem: @@ -209,8 +215,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) nn->drc_hashtbl = NULL; nn->drc_hashsize = 0; - kmem_cache_destroy(nn->drc_slab); - nn->drc_slab = NULL; } /* @@ -323,8 +327,10 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp, struct nfsd_net *nn) { if (key->c_key.k_xid == rp->c_key.k_xid && - key->c_key.k_csum != rp->c_key.k_csum) + key->c_key.k_csum != rp->c_key.k_csum) { ++nn->payload_misses; + trace_nfsd_drc_mismatch(nn, key, rp); + } return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); } @@ -377,15 +383,22 @@ out: return ret; } -/* +/** + * nfsd_cache_lookup - Find an entry in the duplicate reply cache + * @rqstp: Incoming Call to find + * * Try to find an entry matching the current call in the cache. When none * is found, we try to grab the oldest expired entry off the LRU list. If * a suitable one isn't there, then drop the cache_lock and allocate a * new one, then search again in case one got inserted while this thread * didn't hold the lock. + * + * Return values: + * %RC_DOIT: Process the request normally + * %RC_REPLY: Reply from cache + * %RC_DROPIT: Do not process the request further */ -int -nfsd_cache_lookup(struct svc_rqst *rqstp) +int nfsd_cache_lookup(struct svc_rqst *rqstp) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp, *found; @@ -399,7 +412,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rqstp->rq_cacherep = NULL; if (type == RC_NOCACHE) { nfsdstats.rcnocache++; - return rtn; + goto out; } csum = nfsd_cache_csum(rqstp); @@ -409,10 +422,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * preallocate an entry. */ rp = nfsd_reply_cache_alloc(rqstp, csum, nn); - if (!rp) { - dprintk("nfsd: unable to allocate DRC entry!\n"); - return rtn; - } + if (!rp) + goto out; spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp, nn); @@ -431,8 +442,10 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) /* go ahead and prune the cache */ prune_bucket(b, nn); - out: + +out_unlock: spin_unlock(&b->cache_lock); +out: return rtn; found_entry: @@ -442,13 +455,13 @@ found_entry: /* Request being processed */ if (rp->c_state == RC_INPROG) - goto out; + goto out_trace; /* From the hall of fame of impractical attacks: * Is this a user who tries to snoop on the cache? */ rtn = RC_DOIT; if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure) - goto out; + goto out_trace; /* Compose RPC reply header */ switch (rp->c_type) { @@ -460,21 +473,26 @@ found_entry: break; case RC_REPLBUFF: if (!nfsd_cache_append(rqstp, &rp->c_replvec)) - goto out; /* should not happen */ + goto out_unlock; /* should not happen */ rtn = RC_REPLY; break; default: - printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(b, rp, nn); + WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type); } - goto out; +out_trace: + trace_nfsd_drc_found(nn, rqstp, rtn); + goto out_unlock; } -/* - * Update a cache entry. This is called from nfsd_dispatch when - * the procedure has been executed and the complete reply is in - * rqstp->rq_res. +/** + * nfsd_cache_update - Update an entry in the duplicate reply cache. + * @rqstp: svc_rqst with a finished Reply + * @cachetype: which cache to update + * @statp: Reply's status code + * + * This is called from nfsd_dispatch when the procedure has been + * executed and the complete reply is in rqstp->rq_res. * * We're copying around data here rather than swapping buffers because * the toplevel loop requires max-sized buffers, which would be a waste @@ -487,8 +505,7 @@ found_entry: * nfsd failed to encode a reply that otherwise would have been cached. * In this case, nfsd_cache_update is called with statp == NULL. */ -void -nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) +void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3bb2db947d29..b68e96681522 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -238,7 +238,7 @@ static inline struct net *netns(struct file *file) return file_inode(file)->i_sb->s_fs_info; } -/** +/* * write_unlock_ip - Release all locks used by a client * * Experimental. @@ -277,7 +277,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) return nlmsvc_unlock_all_by_ip(sap); } -/** +/* * write_unlock_fs - Release all locks on a local file system * * Experimental. @@ -327,7 +327,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) return error; } -/** +/* * write_filehandle - Get a variable-length NFS file handle by path * * On input, the buffer contains a '\n'-terminated C string comprised of @@ -402,7 +402,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) return mesg - buf; } -/** +/* * write_threads - Start NFSD, or report the current number of running threads * * Input: @@ -452,7 +452,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } -/** +/* * write_pool_threads - Set or report the current number of threads per pool * * Input: @@ -661,7 +661,7 @@ out: return tlen + len; } -/** +/* * write_versions - Set or report the available NFS protocol versions * * Input: @@ -811,7 +811,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, return -EINVAL; } -/** +/* * write_ports - Pass a socket file descriptor or transport name to listen on * * Input: @@ -867,7 +867,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) int nfsd_max_blksize; -/** +/* * write_maxblksize - Set or report the current NFS blksize * * Input: @@ -917,7 +917,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } -/** +/* * write_maxconn - Set or report the current max number of connections * * Input: @@ -998,7 +998,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, return rv; } -/** +/* * write_leasetime - Set or report the current NFSv4 lease time * * Input: @@ -1025,7 +1025,7 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } -/** +/* * write_gracetime - Set or report current NFSv4 grace period time * * As above, but sets the time of the NFSv4 grace period. @@ -1069,7 +1069,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, nfs4_recoverydir()); } -/** +/* * write_recoverydir - Set or report the pathname of the recovery directory * * Input: @@ -1101,7 +1101,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) return rv; } -/** +/* * write_v4_end_grace - release grace period for nfsd's v4.x lock manager * * Input: @@ -1533,6 +1533,9 @@ static int __init init_nfsd(void) goto out_free_slabs; nfsd_fault_inject_init(); /* nfsd fault injection controls */ nfsd_stat_init(); /* Statistics */ + retval = nfsd_drc_slab_create(); + if (retval) + goto out_free_stat; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) @@ -1546,6 +1549,8 @@ out_free_all: remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); +out_free_stat: nfsd_stat_shutdown(); nfsd_fault_inject_cleanup(); nfsd4_exit_pnfs(); @@ -1560,6 +1565,7 @@ out_unregister_pernet: static void __exit exit_nfsd(void) { + nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 2ab5569126b8..36cdd81b6688 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -88,6 +88,8 @@ int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_destroy(struct net *net); +bool i_am_nfsd(void); + struct nfsdfs_client { struct kref cl_ref; void (*cl_release)(struct kref *kref); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ca9fd348548b..b603dfcdd361 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -601,6 +601,11 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_module = THIS_MODULE, }; +bool i_am_nfsd(void) +{ + return kthread_func(current) == nfsd; +} + int nfsd_create_serv(struct net *net) { int error; @@ -1011,6 +1016,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) *statp = rpc_garbage_args; return 1; } + rqstp->rq_lease_breaker = NULL; /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 68d3f30ee760..3b408532a5dc 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -64,13 +64,6 @@ typedef struct { refcount_t sc_count; } copy_stateid_t; -#define STATEID_FMT "(%08x/%08x/%08x/%08x)" -#define STATEID_VAL(s) \ - (s)->si_opaque.so_clid.cl_boot, \ - (s)->si_opaque.so_clid.cl_id, \ - (s)->si_opaque.so_id, \ - (s)->si_generation - struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 78c574251c60..1861db1bdc67 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -277,6 +277,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class, DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \ TP_PROTO(stateid_t *stp), \ TP_ARGS(stp)) + DEFINE_STATEID_EVENT(layoutstate_alloc); DEFINE_STATEID_EVENT(layoutstate_unhash); DEFINE_STATEID_EVENT(layoutstate_free); @@ -288,6 +289,138 @@ DEFINE_STATEID_EVENT(layout_recall_done); DEFINE_STATEID_EVENT(layout_recall_fail); DEFINE_STATEID_EVENT(layout_recall_release); +DEFINE_STATEID_EVENT(deleg_open); +DEFINE_STATEID_EVENT(deleg_none); +DEFINE_STATEID_EVENT(deleg_break); +DEFINE_STATEID_EVENT(deleg_recall); + +DECLARE_EVENT_CLASS(nfsd_stateseqid_class, + TP_PROTO(u32 seqid, const stateid_t *stp), + TP_ARGS(seqid, stp), + TP_STRUCT__entry( + __field(u32, seqid) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + __entry->seqid = seqid; + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("seqid=%u client %08x:%08x stateid %08x:%08x", + __entry->seqid, __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation) +) + +#define DEFINE_STATESEQID_EVENT(name) \ +DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ + TP_PROTO(u32 seqid, const stateid_t *stp), \ + TP_ARGS(seqid, stp)) + +DEFINE_STATESEQID_EVENT(preprocess); +DEFINE_STATESEQID_EVENT(open_confirm); + +DECLARE_EVENT_CLASS(nfsd_clientid_class, + TP_PROTO(const clientid_t *clid), + TP_ARGS(clid), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + ), + TP_fast_assign( + __entry->cl_boot = clid->cl_boot; + __entry->cl_id = clid->cl_id; + ), + TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id) +) + +#define DEFINE_CLIENTID_EVENT(name) \ +DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \ + TP_PROTO(const clientid_t *clid), \ + TP_ARGS(clid)) + +DEFINE_CLIENTID_EVENT(expired); +DEFINE_CLIENTID_EVENT(purged); +DEFINE_CLIENTID_EVENT(renew); +DEFINE_CLIENTID_EVENT(stale); + +DECLARE_EVENT_CLASS(nfsd_net_class, + TP_PROTO(const struct nfsd_net *nn), + TP_ARGS(nn), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + ), + TP_printk("boot_time=%16llx", __entry->boot_time) +) + +#define DEFINE_NET_EVENT(name) \ +DEFINE_EVENT(nfsd_net_class, nfsd_##name, \ + TP_PROTO(const struct nfsd_net *nn), \ + TP_ARGS(nn)) + +DEFINE_NET_EVENT(grace_start); +DEFINE_NET_EVENT(grace_complete); + +DECLARE_EVENT_CLASS(nfsd_clid_class, + TP_PROTO(const struct nfsd_net *nn, + unsigned int namelen, + const unsigned char *namedata), + TP_ARGS(nn, namelen, namedata), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(unsigned int, namelen) + __dynamic_array(unsigned char, name, namelen) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->namelen = namelen; + memcpy(__get_dynamic_array(name), namedata, namelen); + ), + TP_printk("boot_time=%16llx nfs4_clientid=%.*s", + __entry->boot_time, __entry->namelen, __get_str(name)) +) + +#define DEFINE_CLID_EVENT(name) \ +DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \ + TP_PROTO(const struct nfsd_net *nn, \ + unsigned int namelen, \ + const unsigned char *namedata), \ + TP_ARGS(nn, namelen, namedata)) + +DEFINE_CLID_EVENT(find); +DEFINE_CLID_EVENT(reclaim); + +TRACE_EVENT(nfsd_clid_inuse_err, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __field(unsigned int, namelen) + __dynamic_array(unsigned char, name, clp->cl_name.len) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->addr, &clp->cl_addr, + sizeof(struct sockaddr_in6)); + __entry->namelen = clp->cl_name.len; + memcpy(__get_dynamic_array(name), clp->cl_name.data, + clp->cl_name.len); + ), + TP_printk("nfs4_clientid %.*s already in use by %pISpc, client %08x:%08x", + __entry->namelen, __get_str(name), __entry->addr, + __entry->cl_boot, __entry->cl_id) +) + TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); @@ -432,6 +565,218 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +#include "cache.h" + +TRACE_DEFINE_ENUM(RC_DROPIT); +TRACE_DEFINE_ENUM(RC_REPLY); +TRACE_DEFINE_ENUM(RC_DOIT); + +#define show_drc_retval(x) \ + __print_symbolic(x, \ + { RC_DROPIT, "DROPIT" }, \ + { RC_REPLY, "REPLY" }, \ + { RC_DOIT, "DOIT" }) + +TRACE_EVENT(nfsd_drc_found, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_rqst *rqstp, + int result + ), + TP_ARGS(nn, rqstp, result), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(unsigned long, result) + __field(u32, xid) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->result = result; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + ), + TP_printk("boot_time=%16llx xid=0x%08x result=%s", + __entry->boot_time, __entry->xid, + show_drc_retval(__entry->result)) + +); + +TRACE_EVENT(nfsd_drc_mismatch, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_cacherep *key, + const struct svc_cacherep *rp + ), + TP_ARGS(nn, key, rp), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(u32, xid) + __field(u32, cached) + __field(u32, ingress) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->xid = be32_to_cpu(key->c_key.k_xid); + __entry->cached = (__force u32)key->c_key.k_csum; + __entry->ingress = (__force u32)rp->c_key.k_csum; + ), + TP_printk("boot_time=%16llx xid=0x%08x cached-csum=0x%08x ingress-csum=0x%08x", + __entry->boot_time, __entry->xid, __entry->cached, + __entry->ingress) +); + +TRACE_EVENT(nfsd_cb_args, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfs4_cb_conn *conn + ), + TP_ARGS(clp, conn), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, prog) + __field(u32, ident) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->prog = conn->cb_prog; + __entry->ident = conn->cb_ident; + memcpy(__entry->addr, &conn->cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("client %08x:%08x callback addr=%pISpc prog=%u ident=%u", + __entry->cl_boot, __entry->cl_id, + __entry->addr, __entry->prog, __entry->ident) +); + +TRACE_EVENT(nfsd_cb_nodelegs, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id) +) + +TRACE_DEFINE_ENUM(NFSD4_CB_UP); +TRACE_DEFINE_ENUM(NFSD4_CB_UNKNOWN); +TRACE_DEFINE_ENUM(NFSD4_CB_DOWN); +TRACE_DEFINE_ENUM(NFSD4_CB_FAULT); + +#define show_cb_state(val) \ + __print_symbolic(val, \ + { NFSD4_CB_UP, "UP" }, \ + { NFSD4_CB_UNKNOWN, "UNKNOWN" }, \ + { NFSD4_CB_DOWN, "DOWN" }, \ + { NFSD4_CB_FAULT, "FAULT"}) + +DECLARE_EVENT_CLASS(nfsd_cb_class, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(unsigned long, state) + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->state = clp->cl_cb_state; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x state=%s", + __entry->addr, __entry->cl_boot, __entry->cl_id, + show_cb_state(__entry->state)) +); + +#define DEFINE_NFSD_CB_EVENT(name) \ +DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \ + TP_PROTO(const struct nfs4_client *clp), \ + TP_ARGS(clp)) + +DEFINE_NFSD_CB_EVENT(setup); +DEFINE_NFSD_CB_EVENT(state); +DEFINE_NFSD_CB_EVENT(shutdown); + +TRACE_EVENT(nfsd_cb_setup_err, + TP_PROTO( + const struct nfs4_client *clp, + long error + ), + TP_ARGS(clp, error), + TP_STRUCT__entry( + __field(long, error) + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->error = error; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x error=%ld", + __entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error) +); + +TRACE_EVENT(nfsd_cb_work, + TP_PROTO( + const struct nfs4_client *clp, + const char *procedure + ), + TP_ARGS(clp, procedure), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __string(procedure, procedure) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_str(procedure, procedure) + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x procedure=%s", + __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_str(procedure)) +); + +TRACE_EVENT(nfsd_cb_done, + TP_PROTO( + const struct nfs4_client *clp, + int status + ), + TP_ARGS(clp, status), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(int, status) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->status = status; + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x status=%d", + __entry->addr, __entry->cl_boot, __entry->cl_id, + __entry->status) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/linux/fs.h b/include/linux/fs.h index 19ef6c88c152..8e1f8f93108f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1048,6 +1048,7 @@ struct lock_manager_operations { bool (*lm_break)(struct file_lock *); int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); + bool (*lm_breaker_owns_lease)(struct file_lock *); }; struct lock_manager { diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8bbcaad7ef0f..c2a274b79c42 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -57,6 +57,7 @@ bool kthread_should_stop(void); bool kthread_should_park(void); bool __kthread_should_park(struct task_struct *k); bool kthread_freezable_should_stop(bool *was_frozen); +void *kthread_func(struct task_struct *k); void *kthread_data(struct task_struct *k); void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index bc07e51f20d1..bf4ac8a0268c 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -84,6 +84,7 @@ struct pf_desc { u32 service; char *name; char *auth_domain_name; + struct auth_domain *domain; bool datatouch; }; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fd390894a584..386628b36bc7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -254,6 +254,7 @@ struct svc_rqst { struct page * *rq_page_end; /* one past the last page */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ + struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ @@ -299,6 +300,7 @@ struct svc_rqst { struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + void ** rq_lease_breaker; /* The v4 client breaking a lease */ }; #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cbcfbd0521e3..7ed82625dc0b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -48,7 +48,6 @@ #include <linux/sunrpc/rpc_rdma.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> -#define SVCRDMA_DEBUG /* Default and maximum inline threshold sizes */ enum { @@ -160,9 +159,8 @@ struct svc_rdma_send_ctxt { }; /* svc_rdma_backchannel.c */ -extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, - __be32 *rdma_resp, - struct xdr_buf *rcvbuf); +extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt); /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 9e1e046de176..aca35ab5cff2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u return 0; } +static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt) +{ + return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) || + (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0); +} + int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index ca39a388dc22..f09c82b0a7ae 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -20,7 +20,8 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int gss_svc_init_net(struct net *net); void gss_svc_shutdown_net(struct net *net); -int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +struct auth_domain *svcauth_gss_register_pseudoflavor(u32 pseudoflavor, + char *name); u32 svcauth_gss_flavor(struct auth_domain *dom); #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 771baadaee9d..b7ac7fe68306 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,7 +28,7 @@ struct svc_sock { /* private TCP part */ /* On-the-wire fragment header: */ - __be32 sk_reclen; + __be32 sk_marker; /* As we receive a record, this includes the length received so * far (including the fragment header): */ u32 sk_tcplen; @@ -41,12 +41,12 @@ struct svc_sock { static inline u32 svc_sock_reclen(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; + return be32_to_cpu(svsk->sk_marker) & RPC_FRAGMENT_SIZE_MASK; } static inline u32 svc_sock_final_rec(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; + return be32_to_cpu(svsk->sk_marker) & RPC_LAST_STREAM_FRAGMENT; } /* diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 132c3c778a43..bdcde7d33f14 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1279,38 +1279,42 @@ TRACE_EVENT(xprtrdma_leaked_rep, ** Server-side RPC/RDMA events **/ -DECLARE_EVENT_CLASS(svcrdma_xprt_event, +DECLARE_EVENT_CLASS(svcrdma_accept_class, TP_PROTO( - const struct svc_xprt *xprt + const struct svcxprt_rdma *rdma, + long status ), - TP_ARGS(xprt), + TP_ARGS(rdma, status), TP_STRUCT__entry( - __field(const void *, xprt) - __string(addr, xprt->xpt_remotebuf) + __field(long, status) + __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; - __assign_str(addr, xprt->xpt_remotebuf); + __entry->status = status; + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s", - __entry->xprt, __get_str(addr) + TP_printk("addr=%s status=%ld", + __get_str(addr), __entry->status ) ); -#define DEFINE_XPRT_EVENT(name) \ - DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name, \ - TP_PROTO( \ - const struct svc_xprt *xprt \ - ), \ - TP_ARGS(xprt)) +#define DEFINE_ACCEPT_EVENT(name) \ + DEFINE_EVENT(svcrdma_accept_class, svcrdma_##name##_err, \ + TP_PROTO( \ + const struct svcxprt_rdma *rdma, \ + long status \ + ), \ + TP_ARGS(rdma, status)) -DEFINE_XPRT_EVENT(accept); -DEFINE_XPRT_EVENT(fail); -DEFINE_XPRT_EVENT(free); +DEFINE_ACCEPT_EVENT(pd); +DEFINE_ACCEPT_EVENT(qp); +DEFINE_ACCEPT_EVENT(fabric); +DEFINE_ACCEPT_EVENT(initdepth); +DEFINE_ACCEPT_EVENT(accept); TRACE_DEFINE_ENUM(RDMA_MSG); TRACE_DEFINE_ENUM(RDMA_NOMSG); @@ -1355,7 +1359,7 @@ TRACE_EVENT(svcrdma_decode_rqst, show_rpcrdma_proc(__entry->proc), __entry->hdrlen) ); -TRACE_EVENT(svcrdma_decode_short, +TRACE_EVENT(svcrdma_decode_short_err, TP_PROTO( unsigned int hdrlen ), @@ -1399,7 +1403,8 @@ DECLARE_EVENT_CLASS(svcrdma_badreq_event, ); #define DEFINE_BADREQ_EVENT(name) \ - DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\ + DEFINE_EVENT(svcrdma_badreq_event, \ + svcrdma_decode_##name##_err, \ TP_PROTO( \ __be32 *p \ ), \ @@ -1583,28 +1588,117 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class, DEFINE_SVC_DMA_EVENT(dma_map_page); DEFINE_SVC_DMA_EVENT(dma_unmap_page); -TRACE_EVENT(svcrdma_dma_map_rwctx, +TRACE_EVENT(svcrdma_dma_map_rw_err, TP_PROTO( const struct svcxprt_rdma *rdma, + unsigned int nents, int status ), - TP_ARGS(rdma, status), + TP_ARGS(rdma, nents, status), TP_STRUCT__entry( __field(int, status) + __field(unsigned int, nents) __string(device, rdma->sc_cm_id->device->name) __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( __entry->status = status; + __entry->nents = nents; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s nents=%u status=%d", + __get_str(addr), __get_str(device), __entry->nents, + __entry->status + ) +); + +TRACE_EVENT(svcrdma_no_rwctx_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + unsigned int num_sges + ), + + TP_ARGS(rdma, num_sges), + + TP_STRUCT__entry( + __field(unsigned int, num_sges) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->num_sges = num_sges; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s num_sges=%d", + __get_str(addr), __get_str(device), __entry->num_sges + ) +); + +TRACE_EVENT(svcrdma_page_overrun_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + const struct svc_rqst *rqst, + unsigned int pageno + ), + + TP_ARGS(rdma, rqst, pageno), + + TP_STRUCT__entry( + __field(unsigned int, pageno) + __field(u32, xid) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->pageno = pageno; + __entry->xid = __be32_to_cpu(rqst->rq_xid); + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s xid=0x%08x pageno=%u", __get_str(addr), + __get_str(device), __entry->xid, __entry->pageno + ) +); + +TRACE_EVENT(svcrdma_small_wrch_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + unsigned int remaining, + unsigned int seg_no, + unsigned int num_segs + ), + + TP_ARGS(rdma, remaining, seg_no, num_segs), + + TP_STRUCT__entry( + __field(unsigned int, remaining) + __field(unsigned int, seg_no) + __field(unsigned int, num_segs) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->remaining = remaining; + __entry->seg_no = seg_no; + __entry->num_segs = num_segs; __assign_str(device, rdma->sc_cm_id->device->name); __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s status=%d", - __get_str(addr), __get_str(device), __entry->status + TP_printk("addr=%s device=%s remaining=%u seg_no=%u num_segs=%u", + __get_str(addr), __get_str(device), __entry->remaining, + __entry->seg_no, __entry->num_segs ) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ffd2215950dc..852413cbb7d9 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,6 +14,39 @@ #include <linux/net.h> #include <linux/tracepoint.h> +TRACE_DEFINE_ENUM(SOCK_STREAM); +TRACE_DEFINE_ENUM(SOCK_DGRAM); +TRACE_DEFINE_ENUM(SOCK_RAW); +TRACE_DEFINE_ENUM(SOCK_RDM); +TRACE_DEFINE_ENUM(SOCK_SEQPACKET); +TRACE_DEFINE_ENUM(SOCK_DCCP); +TRACE_DEFINE_ENUM(SOCK_PACKET); + +#define show_socket_type(type) \ + __print_symbolic(type, \ + { SOCK_STREAM, "STREAM" }, \ + { SOCK_DGRAM, "DGRAM" }, \ + { SOCK_RAW, "RAW" }, \ + { SOCK_RDM, "RDM" }, \ + { SOCK_SEQPACKET, "SEQPACKET" }, \ + { SOCK_DCCP, "DCCP" }, \ + { SOCK_PACKET, "PACKET" }) + +/* This list is known to be incomplete, add new enums as needed. */ +TRACE_DEFINE_ENUM(AF_UNSPEC); +TRACE_DEFINE_ENUM(AF_UNIX); +TRACE_DEFINE_ENUM(AF_LOCAL); +TRACE_DEFINE_ENUM(AF_INET); +TRACE_DEFINE_ENUM(AF_INET6); + +#define rpc_show_address_family(family) \ + __print_symbolic(family, \ + { AF_UNSPEC, "AF_UNSPEC" }, \ + { AF_UNIX, "AF_UNIX" }, \ + { AF_LOCAL, "AF_LOCAL" }, \ + { AF_INET, "AF_INET" }, \ + { AF_INET6, "AF_INET6" }) + DECLARE_EVENT_CLASS(xdr_buf_class, TP_PROTO( const struct xdr_buf *xdr @@ -1024,6 +1057,17 @@ TRACE_EVENT(svc_recv, show_rqstp_flags(__entry->flags)) ); +TRACE_DEFINE_ENUM(SVC_GARBAGE); +TRACE_DEFINE_ENUM(SVC_SYSERR); +TRACE_DEFINE_ENUM(SVC_VALID); +TRACE_DEFINE_ENUM(SVC_NEGATIVE); +TRACE_DEFINE_ENUM(SVC_OK); +TRACE_DEFINE_ENUM(SVC_DROP); +TRACE_DEFINE_ENUM(SVC_CLOSE); +TRACE_DEFINE_ENUM(SVC_DENIED); +TRACE_DEFINE_ENUM(SVC_PENDING); +TRACE_DEFINE_ENUM(SVC_COMPLETE); + #define svc_show_status(status) \ __print_symbolic(status, \ { SVC_GARBAGE, "SVC_GARBAGE" }, \ @@ -1167,28 +1211,54 @@ DEFINE_EVENT(svc_rqst_status, svc_send, { (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \ { (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"}) +TRACE_EVENT(svc_xprt_create_err, + TP_PROTO( + const char *program, + const char *protocol, + struct sockaddr *sap, + const struct svc_xprt *xprt + ), + + TP_ARGS(program, protocol, sap, xprt), + + TP_STRUCT__entry( + __field(long, error) + __string(program, program) + __string(protocol, protocol) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->error = PTR_ERR(xprt); + __assign_str(program, program); + __assign_str(protocol, protocol); + memcpy(__entry->addr, sap, sizeof(__entry->addr)); + ), + + TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", + __entry->addr, __get_str(program), __get_str(protocol), + __entry->error) +); + TRACE_EVENT(svc_xprt_do_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), TP_ARGS(xprt, rqst), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(int, pid) __field(unsigned long, flags) __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; __entry->pid = rqst? rqst->rq_task->pid : 0; __entry->flags = xprt->xpt_flags; __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s pid=%d flags=%s", - __entry->xprt, __get_str(addr), - __entry->pid, show_svc_xprt_flags(__entry->flags)) + TP_printk("addr=%s pid=%d flags=%s", __get_str(addr), + __entry->pid, show_svc_xprt_flags(__entry->flags)) ); DECLARE_EVENT_CLASS(svc_xprt_event, @@ -1197,25 +1267,55 @@ DECLARE_EVENT_CLASS(svc_xprt_event, TP_ARGS(xprt), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(unsigned long, flags) __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; __entry->flags = xprt->xpt_flags; __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s flags=%s", - __entry->xprt, __get_str(addr), - show_svc_xprt_flags(__entry->flags)) + TP_printk("addr=%s flags=%s", __get_str(addr), + show_svc_xprt_flags(__entry->flags)) ); -DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space, - TP_PROTO(struct svc_xprt *xprt), - TP_ARGS(xprt)); +#define DEFINE_SVC_XPRT_EVENT(name) \ + DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \ + TP_PROTO( \ + struct svc_xprt *xprt \ + ), \ + TP_ARGS(xprt)) + +DEFINE_SVC_XPRT_EVENT(no_write_space); +DEFINE_SVC_XPRT_EVENT(close); +DEFINE_SVC_XPRT_EVENT(detach); +DEFINE_SVC_XPRT_EVENT(free); + +TRACE_EVENT(svc_xprt_accept, + TP_PROTO( + const struct svc_xprt *xprt, + const char *service + ), + + TP_ARGS(xprt, service), + + TP_STRUCT__entry( + __string(addr, xprt->xpt_remotebuf) + __string(protocol, xprt->xpt_class->xcl_name) + __string(service, service) + ), + + TP_fast_assign( + __assign_str(addr, xprt->xpt_remotebuf); + __assign_str(protocol, xprt->xpt_class->xcl_name) + __assign_str(service, service); + ), + + TP_printk("addr=%s protocol=%s service=%s", + __get_str(addr), __get_str(protocol), __get_str(service) + ) +); TRACE_EVENT(svc_xprt_dequeue, TP_PROTO(struct svc_rqst *rqst), @@ -1223,24 +1323,20 @@ TRACE_EVENT(svc_xprt_dequeue, TP_ARGS(rqst), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(unsigned long, flags) __field(unsigned long, wakeup) __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = rqst->rq_xprt; __entry->flags = rqst->rq_xprt->xpt_flags; __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), rqst->rq_qtime)); __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu", - __entry->xprt, __get_str(addr), - show_svc_xprt_flags(__entry->flags), - __entry->wakeup) + TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr), + show_svc_xprt_flags(__entry->flags), __entry->wakeup) ); TRACE_EVENT(svc_wake_up, @@ -1265,21 +1361,18 @@ TRACE_EVENT(svc_handle_xprt, TP_ARGS(xprt, len), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(int, len) __field(unsigned long, flags) __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; __entry->len = len; __entry->flags = xprt->xpt_flags; __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s len=%d flags=%s", - __entry->xprt, __get_str(addr), + TP_printk("addr=%s len=%d flags=%s", __get_str(addr), __entry->len, show_svc_xprt_flags(__entry->flags)) ); @@ -1313,27 +1406,221 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_ARGS(dr), TP_STRUCT__entry( + __field(const void *, dr) __field(u32, xid) __string(addr, dr->xprt->xpt_remotebuf) ), TP_fast_assign( + __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); __assign_str(addr, dr->xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid) + TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + __entry->xid) ); + #define DEFINE_SVC_DEFERRED_EVENT(name) \ - DEFINE_EVENT(svc_deferred_event, svc_##name##_deferred, \ + DEFINE_EVENT(svc_deferred_event, svc_defer_##name, \ TP_PROTO( \ const struct svc_deferred_req *dr \ ), \ TP_ARGS(dr)) DEFINE_SVC_DEFERRED_EVENT(drop); -DEFINE_SVC_DEFERRED_EVENT(revisit); +DEFINE_SVC_DEFERRED_EVENT(queue); +DEFINE_SVC_DEFERRED_EVENT(recv); + +TRACE_EVENT(svcsock_new_socket, + TP_PROTO( + const struct socket *socket + ), + + TP_ARGS(socket), + + TP_STRUCT__entry( + __field(unsigned long, type) + __field(unsigned long, family) + __field(bool, listener) + ), + + TP_fast_assign( + __entry->type = socket->type; + __entry->family = socket->sk->sk_family; + __entry->listener = (socket->sk->sk_state == TCP_LISTEN); + ), + + TP_printk("type=%s family=%s%s", + show_socket_type(__entry->type), + rpc_show_address_family(__entry->family), + __entry->listener ? " (listener)" : "" + ) +); + +TRACE_EVENT(svcsock_marker, + TP_PROTO( + const struct svc_xprt *xprt, + __be32 marker + ), + + TP_ARGS(xprt, marker), + + TP_STRUCT__entry( + __field(unsigned int, length) + __field(bool, last) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->length = be32_to_cpu(marker) & RPC_FRAGMENT_SIZE_MASK; + __entry->last = be32_to_cpu(marker) & RPC_LAST_STREAM_FRAGMENT; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s length=%u%s", __get_str(addr), + __entry->length, __entry->last ? " (last)" : "") +); + +DECLARE_EVENT_CLASS(svcsock_class, + TP_PROTO( + const struct svc_xprt *xprt, + ssize_t result + ), + + TP_ARGS(xprt, result), + + TP_STRUCT__entry( + __field(ssize_t, result) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->result = result; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s result=%zd flags=%s", __get_str(addr), + __entry->result, show_svc_xprt_flags(__entry->flags) + ) +); + +#define DEFINE_SVCSOCK_EVENT(name) \ + DEFINE_EVENT(svcsock_class, svcsock_##name, \ + TP_PROTO( \ + const struct svc_xprt *xprt, \ + ssize_t result \ + ), \ + TP_ARGS(xprt, result)) + +DEFINE_SVCSOCK_EVENT(udp_send); +DEFINE_SVCSOCK_EVENT(udp_recv); +DEFINE_SVCSOCK_EVENT(udp_recv_err); +DEFINE_SVCSOCK_EVENT(tcp_send); +DEFINE_SVCSOCK_EVENT(tcp_recv); +DEFINE_SVCSOCK_EVENT(tcp_recv_eagain); +DEFINE_SVCSOCK_EVENT(tcp_recv_err); +DEFINE_SVCSOCK_EVENT(data_ready); +DEFINE_SVCSOCK_EVENT(write_space); + +TRACE_EVENT(svcsock_tcp_recv_short, + TP_PROTO( + const struct svc_xprt *xprt, + u32 expected, + u32 received + ), + + TP_ARGS(xprt, expected, received), + + TP_STRUCT__entry( + __field(u32, expected) + __field(u32, received) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->expected = expected; + __entry->received = received; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s flags=%s expected=%u received=%u", + __get_str(addr), show_svc_xprt_flags(__entry->flags), + __entry->expected, __entry->received + ) +); + +TRACE_EVENT(svcsock_tcp_state, + TP_PROTO( + const struct svc_xprt *xprt, + const struct socket *socket + ), + + TP_ARGS(xprt, socket), + + TP_STRUCT__entry( + __field(unsigned long, socket_state) + __field(unsigned long, sock_state) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->socket_state = socket->state; + __entry->sock_state = socket->sk->sk_state; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s state=%s sk_state=%s flags=%s", __get_str(addr), + rpc_show_socket_state(__entry->socket_state), + rpc_show_sock_state(__entry->sock_state), + show_svc_xprt_flags(__entry->flags) + ) +); + +DECLARE_EVENT_CLASS(svcsock_accept_class, + TP_PROTO( + const struct svc_xprt *xprt, + const char *service, + long status + ), + + TP_ARGS(xprt, service, status), + + TP_STRUCT__entry( + __field(long, status) + __string(service, service) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->status = status; + __assign_str(service, service); + memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr)); + ), + + TP_printk("listener=%pISpc service=%s status=%ld", + __entry->addr, __get_str(service), __entry->status + ) +); + +#define DEFINE_ACCEPT_EVENT(name) \ + DEFINE_EVENT(svcsock_accept_class, svcsock_##name##_err, \ + TP_PROTO( \ + const struct svc_xprt *xprt, \ + const char *service, \ + long status \ + ), \ + TP_ARGS(xprt, service, status)) + +DEFINE_ACCEPT_EVENT(accept); +DEFINE_ACCEPT_EVENT(getpeername); DECLARE_EVENT_CLASS(cache_event, TP_PROTO( @@ -1368,6 +1655,86 @@ DEFINE_CACHE_EVENT(cache_entry_update); DEFINE_CACHE_EVENT(cache_entry_make_negative); DEFINE_CACHE_EVENT(cache_entry_no_listener); +DECLARE_EVENT_CLASS(register_class, + TP_PROTO( + const char *program, + const u32 version, + const int family, + const unsigned short protocol, + const unsigned short port, + int error + ), + + TP_ARGS(program, version, family, protocol, port, error), + + TP_STRUCT__entry( + __field(u32, version) + __field(unsigned long, family) + __field(unsigned short, protocol) + __field(unsigned short, port) + __field(int, error) + __string(program, program) + ), + + TP_fast_assign( + __entry->version = version; + __entry->family = family; + __entry->protocol = protocol; + __entry->port = port; + __entry->error = error; + __assign_str(program, program); + ), + + TP_printk("program=%sv%u proto=%s port=%u family=%s error=%d", + __get_str(program), __entry->version, + __entry->protocol == IPPROTO_UDP ? "udp" : "tcp", + __entry->port, rpc_show_address_family(__entry->family), + __entry->error + ) +); + +#define DEFINE_REGISTER_EVENT(name) \ + DEFINE_EVENT(register_class, svc_##name, \ + TP_PROTO( \ + const char *program, \ + const u32 version, \ + const int family, \ + const unsigned short protocol, \ + const unsigned short port, \ + int error \ + ), \ + TP_ARGS(program, version, family, protocol, \ + port, error)) + +DEFINE_REGISTER_EVENT(register); +DEFINE_REGISTER_EVENT(noregister); + +TRACE_EVENT(svc_unregister, + TP_PROTO( + const char *program, + const u32 version, + int error + ), + + TP_ARGS(program, version, error), + + TP_STRUCT__entry( + __field(u32, version) + __field(int, error) + __string(program, program) + ), + + TP_fast_assign( + __entry->version = version; + __entry->error = error; + __assign_str(program, program); + ), + + TP_printk("program=%sv%u error=%d", + __get_str(program), __entry->version, __entry->error + ) +); + #endif /* _TRACE_SUNRPC_H */ #include <trace/define_trace.h> diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..b84fc7eec035 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -46,6 +46,7 @@ struct kthread_create_info struct kthread { unsigned long flags; unsigned int cpu; + int (*threadfn)(void *); void *data; struct completion parked; struct completion exited; @@ -153,6 +154,20 @@ bool kthread_freezable_should_stop(bool *was_frozen) EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); /** + * kthread_func - return the function specified on kthread creation + * @task: kthread task in question + * + * Returns NULL if the task is not a kthread. + */ +void *kthread_func(struct task_struct *task) +{ + if (task->flags & PF_KTHREAD) + return to_kthread(task)->threadfn; + return NULL; +} +EXPORT_SYMBOL_GPL(kthread_func); + +/** * kthread_data - return data value specified on kthread creation * @task: kthread task in question * @@ -164,6 +179,7 @@ void *kthread_data(struct task_struct *task) { return to_kthread(task)->data; } +EXPORT_SYMBOL_GPL(kthread_data); /** * kthread_probe_data - speculative version of kthread_data() @@ -244,6 +260,7 @@ static int kthread(void *_create) do_exit(-ENOMEM); } + self->threadfn = threadfn; self->data = data; init_completion(&self->exited); init_completion(&self->parked); diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 69316ab1b9fa..fae632da1058 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -37,6 +37,8 @@ gss_mech_free(struct gss_api_mech *gm) for (i = 0; i < gm->gm_pf_num; i++) { pf = &gm->gm_pfs[i]; + if (pf->domain) + auth_domain_put(pf->domain); kfree(pf->auth_domain_name); pf->auth_domain_name = NULL; } @@ -59,6 +61,7 @@ make_auth_domain_name(char *name) static int gss_mech_svc_setup(struct gss_api_mech *gm) { + struct auth_domain *dom; struct pf_desc *pf; int i, status; @@ -68,10 +71,13 @@ gss_mech_svc_setup(struct gss_api_mech *gm) status = -ENOMEM; if (pf->auth_domain_name == NULL) goto out; - status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor, - pf->auth_domain_name); - if (status) + dom = svcauth_gss_register_pseudoflavor( + pf->pseudoflavor, pf->auth_domain_name); + if (IS_ERR(dom)) { + status = PTR_ERR(dom); goto out; + } + pf->domain = dom; } return 0; out: diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 0349f455a862..af9c7f43859c 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -223,7 +223,7 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) static char *gssp_stringify(struct xdr_netobj *netobj) { - return kstrndup(netobj->data, netobj->len, GFP_KERNEL); + return kmemdup_nul(netobj->data, netobj->len, GFP_KERNEL); } static void gssp_hostbased_service(char **principal) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 50d93c49ef1a..46027d0c903f 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -809,7 +809,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom) EXPORT_SYMBOL_GPL(svcauth_gss_flavor); -int +struct auth_domain * svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { struct gss_domain *new; @@ -826,21 +826,23 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) new->h.flavour = &svcauthops_gss; new->pseudoflavor = pseudoflavor; - stat = 0; test = auth_domain_lookup(name, &new->h); - if (test != &new->h) { /* Duplicate registration */ + if (test != &new->h) { + pr_warn("svc: duplicate registration of gss pseudo flavour %s.\n", + name); + stat = -EADDRINUSE; auth_domain_put(test); - kfree(new->h.name); - goto out_free_dom; + goto out_free_name; } - return 0; + return test; +out_free_name: + kfree(new->h.name); out_free_dom: kfree(new); out: - return stat; + return ERR_PTR(stat); } - EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor); static inline int diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4a020b688860..c27123e6ba80 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -795,12 +795,6 @@ void rpcb_getport_async(struct rpc_task *task) child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); - if (IS_ERR(child)) { - /* rpcb_map_release() has freed the arguments */ - dprintk("RPC: %5u %s: rpc_run_task failed\n", - task->tk_pid, __func__); - return; - } xprt->stat.bind_count++; rpc_put_task(child); diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 47a756503d11..f6fe2e6cd65a 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -52,4 +52,5 @@ static inline int sock_is_loopback(struct sock *sk) int rpc_clients_notifier_register(void); void rpc_clients_notifier_unregister(void); +void auth_domain_cleanup(void); #endif /* _NET_SUNRPC_SUNRPC_H */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f9edaa9174a4..236fadc4a439 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -23,6 +23,7 @@ #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/xprtsock.h> +#include "sunrpc.h" #include "netns.h" unsigned int sunrpc_net_id; @@ -131,6 +132,7 @@ cleanup_sunrpc(void) unregister_rpc_pipefs(); rpc_destroy_mempool(); unregister_pernet_subsys(&sunrpc_net_ops); + auth_domain_cleanup(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_unregister_sysctl(); #endif diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9ed3126600ce..c211b607239e 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -88,15 +88,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) switch (*ip) { case SVC_POOL_AUTO: - return strlcpy(buf, "auto", 20); + return strlcpy(buf, "auto\n", 20); case SVC_POOL_GLOBAL: - return strlcpy(buf, "global", 20); + return strlcpy(buf, "global\n", 20); case SVC_POOL_PERCPU: - return strlcpy(buf, "percpu", 20); + return strlcpy(buf, "percpu\n", 20); case SVC_POOL_PERNODE: - return strlcpy(buf, "pernode", 20); + return strlcpy(buf, "pernode\n", 20); default: - return sprintf(buf, "%d", *ip); + return sprintf(buf, "%d\n", *ip); } } @@ -991,6 +991,7 @@ static int __svc_register(struct net *net, const char *progname, #endif } + trace_svc_register(progname, version, protocol, port, family, error); return error; } @@ -1000,11 +1001,6 @@ int svc_rpcbind_set_version(struct net *net, unsigned short proto, unsigned short port) { - dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n", - progp->pg_name, version, - proto == IPPROTO_UDP? "udp" : "tcp", - port, family); - return __svc_register(net, progp->pg_name, progp->pg_prog, version, family, proto, port); @@ -1024,11 +1020,8 @@ int svc_generic_rpcbind_set(struct net *net, return 0; if (vers->vs_hidden) { - dprintk("svc: svc_register(%sv%d, %s, %u, %u)" - " (but not telling portmap)\n", - progp->pg_name, version, - proto == IPPROTO_UDP? "udp" : "tcp", - port, family); + trace_svc_noregister(progp->pg_name, version, proto, + port, family, 0); return 0; } @@ -1106,8 +1099,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi if (error == -EPROTONOSUPPORT) error = rpcb_register(net, program, version, 0, 0); - dprintk("svc: %s(%sv%u), error %d\n", - __func__, progname, version, error); + trace_svc_unregister(progname, version, error); } /* @@ -1132,9 +1124,6 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) continue; if (progp->pg_vers[i]->vs_hidden) continue; - - dprintk("svc: attempting to unregister %sv%u\n", - progp->pg_name, i); __svc_unregister(net, progp->pg_prog, i, progp->pg_name); } } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2284ff038dad..c1ff8cdb5b2b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -153,6 +153,7 @@ static void svc_xprt_free(struct kref *kref) xprt_put(xprt->xpt_bc_xprt); if (xprt->xpt_bc_xps) xprt_switch_put(xprt->xpt_bc_xps); + trace_svc_xprt_free(xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } @@ -206,6 +207,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, .sin6_port = htons(port), }; #endif + struct svc_xprt *xprt; struct sockaddr *sap; size_t len; @@ -224,7 +226,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return ERR_PTR(-EAFNOSUPPORT); } - return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); + xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); + if (IS_ERR(xprt)) + trace_svc_xprt_create_err(serv->sv_program->pg_name, + xcl->xcl_name, sap, xprt); + return xprt; } /* @@ -304,15 +310,11 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, { int err; - dprintk("svc: creating transport %s[%d]\n", xprt_name, port); err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); } - if (err < 0) - dprintk("svc: transport %s not found, err %d\n", - xprt_name, -err); return err; } EXPORT_SYMBOL_GPL(svc_create_xprt); @@ -780,7 +782,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) int len = 0; if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { - dprintk("svc_recv: found XPT_CLOSE\n"); if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); @@ -799,6 +800,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (newxpt) { newxpt->xpt_cred = get_cred(xprt->xpt_cred); svc_add_new_temp_xprt(serv, newxpt); + trace_svc_xprt_accept(newxpt, serv->sv_name); } else module_put(xprt->xpt_class->xcl_owner); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { @@ -835,14 +837,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) struct svc_serv *serv = rqstp->rq_server; int len, err; - dprintk("svc: server %p waiting for data (to = %ld)\n", - rqstp, timeout); - - if (rqstp->rq_xprt) - printk(KERN_ERR - "svc_recv: service %p, transport not NULL!\n", - rqstp); - err = svc_alloc_arg(rqstp); if (err) goto out; @@ -890,7 +884,6 @@ EXPORT_SYMBOL_GPL(svc_recv); void svc_drop(struct svc_rqst *rqstp) { trace_svc_drop(rqstp); - dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); svc_xprt_release(rqstp); } EXPORT_SYMBOL_GPL(svc_drop); @@ -914,16 +907,10 @@ int svc_send(struct svc_rqst *rqstp) xb->page_len + xb->tail[0].iov_len; trace_svc_sendto(xb); - - /* Grab mutex to serialize outgoing data. */ - mutex_lock(&xprt->xpt_mutex); trace_svc_stats_latency(rqstp); - if (test_bit(XPT_DEAD, &xprt->xpt_flags) - || test_bit(XPT_CLOSE, &xprt->xpt_flags)) - len = -ENOTCONN; - else - len = xprt->xpt_ops->xpo_sendto(rqstp); - mutex_unlock(&xprt->xpt_mutex); + + len = xprt->xpt_ops->xpo_sendto(rqstp); + trace_svc_send(rqstp, len); svc_xprt_release(rqstp); @@ -1031,11 +1018,10 @@ static void svc_delete_xprt(struct svc_xprt *xprt) struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; - /* Only do this once */ if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) - BUG(); + return; - dprintk("svc: svc_delete_xprt(%p)\n", xprt); + trace_svc_xprt_detach(xprt); xprt->xpt_ops->xpo_detach(xprt); if (xprt->xpt_bc_xprt) xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt); @@ -1056,6 +1042,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt) void svc_close_xprt(struct svc_xprt *xprt) { + trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ @@ -1158,16 +1145,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) set_bit(XPT_DEFERRED, &xprt->xpt_flags); if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) { spin_unlock(&xprt->xpt_lock); - dprintk("revisit canceled\n"); + trace_svc_defer_drop(dr); svc_xprt_put(xprt); - trace_svc_drop_deferred(dr); kfree(dr); return; } - dprintk("revisit queued\n"); dr->xprt = NULL; list_add(&dr->handle.recent, &xprt->xpt_deferred); spin_unlock(&xprt->xpt_lock); + trace_svc_defer_queue(dr); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } @@ -1213,22 +1199,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, dr->argslen << 2); } + trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; - trace_svc_defer(rqstp); return &dr->handle; } /* * recv data from a deferred request into an active one */ -static int svc_deferred_recv(struct svc_rqst *rqstp) +static noinline int svc_deferred_recv(struct svc_rqst *rqstp) { struct svc_deferred_req *dr = rqstp->rq_deferred; + trace_svc_defer_recv(dr); + /* setup iov_base past transport header */ rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); /* The iov_len does not include the transport header bytes */ @@ -1259,7 +1247,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); - trace_svc_revisit_deferred(dr); } else clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 552617e3467b..998b196b6176 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -21,6 +21,8 @@ #include <trace/events/sunrpc.h> +#include "sunrpc.h" + #define RPCDBG_FACILITY RPCDBG_AUTH @@ -205,3 +207,26 @@ struct auth_domain *auth_domain_find(char *name) return NULL; } EXPORT_SYMBOL_GPL(auth_domain_find); + +/** + * auth_domain_cleanup - check that the auth_domain table is empty + * + * On module unload the auth_domain_table must be empty. To make it + * easier to catch bugs which don't clean up domains properly, we + * warn if anything remains in the table at cleanup time. + * + * Note that we cannot proactively remove the domains at this stage. + * The ->release() function might be in a module that has already been + * unloaded. + */ + +void auth_domain_cleanup(void) +{ + int h; + struct auth_domain *hp; + + for (h = 0; h < DN_HASHMAX; h++) + hlist_for_each_entry(hp, &auth_domain_table[h], hash) + pr_warn("svc: domain %s still present at module unload.\n", + hp->name); +} diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 6c8f802c4261..97c0bddba7a3 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -332,15 +332,6 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, return 0; } -static inline int ip_map_update(struct net *net, struct ip_map *ipm, - struct unix_domain *udom, time64_t expiry) -{ - struct sunrpc_net *sn; - - sn = net_generic(net, sunrpc_net_id); - return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); -} - void svcauth_unix_purge(struct net *net) { struct sunrpc_net *sn; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e7a0037d9b56..5c4ec9386f81 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -45,7 +45,6 @@ #include <net/tcp_states.h> #include <linux/uaccess.h> #include <asm/ioctls.h> -#include <trace/events/skb.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> @@ -55,6 +54,8 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/xprt.h> +#include <trace/events/sunrpc.h> + #include "socklib.h" #include "sunrpc.h" @@ -108,31 +109,35 @@ static void svc_reclassify_socket(struct socket *sock) } #endif -/* - * Release an skbuff after use +/** + * svc_tcp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * */ -static void svc_release_skb(struct svc_rqst *rqstp) +static void svc_tcp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; if (skb) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - rqstp->rq_xprt_ctxt = NULL; - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); + rqstp->rq_xprt_ctxt = NULL; skb_free_datagram_locked(svsk->sk_sk, skb); } } -static void svc_release_udp_skb(struct svc_rqst *rqstp) +/** + * svc_udp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * + */ +static void svc_udp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; if (skb) { rqstp->rq_xprt_ctxt = NULL; - - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); consume_skb(skb); } } @@ -218,34 +223,68 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) +{ + struct bvec_iter bi = { + .bi_size = size, + }; + struct bio_vec bv; + + bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); + for_each_bvec(bv, bvec, bi, bi) + flush_dcache_page(bv.bv_page); +} +#else +static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size, + size_t seek) +{ +} +#endif + /* - * Generic recvfrom routine. + * Read from @rqstp's transport socket. The incoming message fills whole + * pages in @rqstp's rq_pages array until the last page of the message + * has been received into a partial page. */ -static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, - unsigned int nr, size_t buflen, unsigned int base) +static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, + size_t seek) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct bio_vec *bvec = rqstp->rq_bvec; struct msghdr msg = { NULL }; + unsigned int i; ssize_t len; + size_t t; rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen); - if (base != 0) { - iov_iter_advance(&msg.msg_iter, base); - buflen -= base; + + for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) { + bvec[i].bv_page = rqstp->rq_pages[i]; + bvec[i].bv_len = PAGE_SIZE; + bvec[i].bv_offset = 0; + } + rqstp->rq_respages = &rqstp->rq_pages[i]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen); + if (seek) { + iov_iter_advance(&msg.msg_iter, seek); + buflen -= seek; } len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + if (len > 0) + svc_flush_bvec(bvec, len, seek); + /* If we read a full record, then assume there may be more * data to read (stream based sockets only!) */ if (len == buflen) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n", - svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -282,13 +321,10 @@ static void svc_data_ready(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; if (svsk) { - dprintk("svc: socket %p(inet %p), busy=%d\n", - svsk, sk, - test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_odata(sk); + trace_svcsock_data_ready(&svsk->sk_xprt, 0); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -302,11 +338,9 @@ static void svc_write_space(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); if (svsk) { - dprintk("svc: socket %p(inet %p), write_space busy=%d\n", - svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); + trace_svcsock_write_space(&svsk->sk_xprt, 0); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -383,8 +417,15 @@ static int svc_udp_get_dest_address(struct svc_rqst *rqstp, return 0; } -/* - * Receive a datagram from a UDP socket. +/** + * svc_udp_recvfrom - Receive a datagram from a UDP socket. + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return */ static int svc_udp_recvfrom(struct svc_rqst *rqstp) { @@ -418,20 +459,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3); clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - skb = NULL; err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); - if (err >= 0) - skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); - - if (skb == NULL) { - if (err != -EAGAIN) { - /* possibly an icmp error */ - dprintk("svc: recvfrom returned error %d\n", -err); - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - } - return 0; - } + if (err < 0) + goto out_recv_err; + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); + if (!skb) + goto out_recv_err; + len = svc_addr_len(svc_addr(rqstp)); rqstp->rq_addrlen = len; if (skb->tstamp == 0) { @@ -442,26 +477,21 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ - len = skb->len; + len = skb->len; rqstp->rq_arg.len = len; + trace_svcsock_udp_recv(&svsk->sk_xprt, len); rqstp->rq_prot = IPPROTO_UDP; - if (!svc_udp_get_dest_address(rqstp, cmh)) { - net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", - cmh->cmsg_level, cmh->cmsg_type); - goto out_free; - } + if (!svc_udp_get_dest_address(rqstp, cmh)) + goto out_cmsg_err; rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp)); if (skb_is_nonlinear(skb)) { /* we have to copy */ local_bh_disable(); - if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { - local_bh_enable(); - /* checksum error */ - goto out_free; - } + if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) + goto out_bh_enable; local_bh_enable(); consume_skb(skb); } else { @@ -489,6 +519,20 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) serv->sv_stats->netudpcnt++; return len; + +out_recv_err: + if (err != -EAGAIN) { + /* possibly an icmp error */ + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + } + trace_svcsock_udp_recv_err(&svsk->sk_xprt, err); + return 0; +out_cmsg_err: + net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", + cmh->cmsg_level, cmh->cmsg_type); + goto out_free; +out_bh_enable: + local_bh_enable(); out_free: kfree_skb(skb); return 0; @@ -498,6 +542,9 @@ out_free: * svc_udp_sendto - Send out a reply on a UDP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_udp_sendto(struct svc_rqst *rqstp) @@ -519,10 +566,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_udp_skb(rqstp); + svc_udp_release_rqst(rqstp); svc_set_cmsg_data(rqstp, cmh); + mutex_lock(&xprt->xpt_mutex); + + if (svc_xprt_is_dead(xprt)) + goto out_notconn; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { @@ -530,9 +582,16 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); } + trace_svcsock_udp_send(xprt, err); + + mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; return sent; + +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; } static int svc_udp_has_wspace(struct svc_xprt *xprt) @@ -576,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = { .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_udp_skb, + .xpo_release_rqst = svc_udp_release_rqst, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_udp_has_wspace, @@ -632,9 +691,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (listen) state change %d\n", - sk, sk->sk_state); - if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); @@ -655,8 +711,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk) if (svsk) { set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); - } else - printk("svc: socket %p: no user data\n", sk); + } } } @@ -667,15 +722,11 @@ static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", - sk, sk->sk_state, sk->sk_user_data); - - if (!svsk) - printk("svc: socket %p: no user data\n", sk); - else { + if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_ostate(sk); + trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); @@ -696,9 +747,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) struct socket *newsock; struct svc_sock *newsvsk; int err, slen; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) return NULL; @@ -711,30 +760,18 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) else if (err != -EAGAIN) net_warn_ratelimited("%s: accept failed (err %d)!\n", serv->sv_name, -err); + trace_svcsock_accept_err(xprt, serv->sv_name, err); return NULL; } set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); err = kernel_getpeername(newsock, sin); if (err < 0) { - net_warn_ratelimited("%s: peername failed (err %d)!\n", - serv->sv_name, -err); + trace_svcsock_getpeername_err(xprt, serv->sv_name, err); goto failed; /* aborted connection or whatever */ } slen = err; - /* Ideally, we would want to reject connections from unauthorized - * hosts here, but when we get encryption, the IP of the host won't - * tell us anything. For now just warn about unpriv connections. - */ - if (!svc_port_is_privileged(sin)) { - dprintk("%s: connect from unprivileged port: %s\n", - serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - } - dprintk("%s: connect from %s\n", serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - /* Reset the inherited callbacks before calling svc_setup_socket */ newsock->sk->sk_state_change = svsk->sk_ostate; newsock->sk->sk_data_ready = svsk->sk_odata; @@ -752,10 +789,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); err = kernel_getsockname(newsock, sin); slen = err; - if (unlikely(err < 0)) { - dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); + if (unlikely(err < 0)) slen = offsetof(struct sockaddr, sa_data); - } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); if (sock_is_loopback(newsock->sk)) @@ -772,13 +807,14 @@ failed: return NULL; } -static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +static size_t svc_tcp_restore_pages(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - unsigned int i, len, npages; + size_t len = svsk->sk_datalen; + unsigned int i, npages; - if (svsk->sk_datalen == 0) + if (!len) return 0; - len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -827,47 +863,45 @@ out: } /* - * Receive fragment record header. - * If we haven't gotten the record length yet, get the next four bytes. + * Receive fragment record header into sk_marker. */ -static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) +static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - struct svc_serv *serv = svsk->sk_xprt.xpt_server; - unsigned int want; - int len; + ssize_t want, len; + /* If we haven't gotten the record length yet, + * get the next four bytes. + */ if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { + struct msghdr msg = { NULL }; struct kvec iov; want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; - iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; + iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; - len = svc_recvfrom(rqstp, &iov, 1, want, 0); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want); + len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); if (len < 0) - goto error; + return len; svsk->sk_tcplen += len; - if (len < want) { - dprintk("svc: short recvfrom while reading record " - "length (%d of %d)\n", len, want); - return -EAGAIN; + /* call again to read the remaining bytes */ + goto err_short; } - - dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker); if (svc_sock_reclen(svsk) + svsk->sk_datalen > - serv->sv_max_mesg) { - net_notice_ratelimited("RPC: fragment too large: %d\n", - svc_sock_reclen(svsk)); - goto err_delete; - } + svsk->sk_xprt.xpt_server->sv_max_mesg) + goto err_too_large; } - return svc_sock_reclen(svsk); -error: - dprintk("RPC: TCP recv_record got %d\n", len); - return len; -err_delete: + +err_too_large: + net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n", + __func__, svsk->sk_xprt.xpt_server->sv_name, + svc_sock_reclen(svsk)); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_short: return -EAGAIN; } @@ -916,87 +950,58 @@ unlock_eagain: return -EAGAIN; } -static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) -{ - int i = 0; - int t = 0; - - while (t < len) { - vec[i].iov_base = page_address(pages[i]); - vec[i].iov_len = PAGE_SIZE; - i++; - t += PAGE_SIZE; - } - return i; -} - static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ - dprintk("svc: TCP %s record (%d bytes)\n", - svc_sock_final_rec(svsk) ? "final" : "nonfinal", - svc_sock_reclen(svsk)); svsk->sk_tcplen = 0; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; } -/* - * Receive data from a TCP socket. +/** + * svc_tcp_recvfrom - Receive data from a TCP socket + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Read the 4-byte stream record marker, then use the record length + * in that marker to set up exactly the resources needed to receive + * the next RPC message into @rqstp. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return + * + * The zero return case handles partial receives and callback Replies. + * The state of a partial receive is preserved in the svc_sock for + * the next call to svc_tcp_recvfrom. */ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int len; - struct kvec *vec; - unsigned int want, base; + size_t want, base; + ssize_t len; __be32 *p; __be32 calldir; - int pnum; - - dprintk("svc: tcp_recv %p data %d conn %d close %d\n", - svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - len = svc_tcp_recv_record(svsk, rqstp); + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + len = svc_tcp_read_marker(svsk, rqstp); if (len < 0) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); - - vec = rqstp->rq_vec; - - pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want); - - rqstp->rq_respages = &rqstp->rq_pages[pnum]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, base + want, base); + want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); + len = svc_tcp_read_msg(rqstp, base + want, base); if (len >= 0) { + trace_svcsock_tcp_recv(&svsk->sk_xprt, len); svsk->sk_tcplen += len; svsk->sk_datalen += len; } - if (len != want || !svc_sock_final_rec(svsk)) { - svc_tcp_save_pages(svsk, rqstp); - if (len < 0 && len != -EAGAIN) - goto err_delete; - if (len == want) - svc_tcp_fragment_received(svsk); - else - dprintk("svc: incomplete TCP record (%d of %d)\n", - (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), - svc_sock_reclen(svsk)); - goto err_noclose; - } - - if (svsk->sk_datalen < 8) { - svsk->sk_datalen = 0; - goto err_delete; /* client is nuts. */ - } + if (len != want || !svc_sock_final_rec(svsk)) + goto err_incomplete; + if (svsk->sk_datalen < 8) + goto err_nuts; rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; @@ -1031,14 +1036,26 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) return rqstp->rq_arg.len; +err_incomplete: + svc_tcp_save_pages(svsk, rqstp); + if (len < 0 && len != -EAGAIN) + goto err_delete; + if (len == want) + svc_tcp_fragment_received(svsk); + else + trace_svcsock_tcp_recv_short(&svsk->sk_xprt, + svc_sock_reclen(svsk), + svsk->sk_tcplen - sizeof(rpc_fraghdr)); + goto err_noclose; error: if (len != -EAGAIN) goto err_delete; - dprintk("RPC: TCP recvfrom got EAGAIN\n"); + trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0); return 0; +err_nuts: + svsk->sk_datalen = 0; err_delete: - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); + trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); err_noclose: return 0; /* record not complete */ @@ -1048,6 +1065,9 @@ err_noclose: * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_tcp_sendto(struct svc_rqst *rqstp) @@ -1063,14 +1083,22 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_skb(rqstp); + svc_tcp_release_rqst(rqstp); + mutex_lock(&xprt->xpt_mutex); + if (svc_xprt_is_dead(xprt)) + goto out_notconn; err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); + trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; + mutex_unlock(&xprt->xpt_mutex); return sent; +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; out_close: pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", xprt->xpt_server->sv_name, @@ -1078,6 +1106,7 @@ out_close: (err < 0) ? err : sent, xdr->len); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } @@ -1094,7 +1123,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_skb, + .xpo_release_rqst = svc_tcp_release_rqst, .xpo_detach = svc_tcp_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_tcp_has_wspace, @@ -1132,18 +1161,16 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { - dprintk("setting up TCP socket for listening\n"); strcpy(svsk->sk_xprt.xpt_remotebuf, "listener"); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); } else { - dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_data_ready; sk->sk_write_space = svc_write_space; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; svsk->sk_tcplen = 0; svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); @@ -1188,7 +1215,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); int err = 0; - dprintk("svc: svc_setup_socket %p\n", sock); svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); if (!svsk) return ERR_PTR(-ENOMEM); @@ -1225,12 +1251,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - dprintk("svc: svc_setup_socket created %p (inet %p), " - "listen %d close %d\n", - svsk, svsk->sk_sk, - test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - + trace_svcsock_new_socket(sock); return svsk; } @@ -1322,11 +1343,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, struct sockaddr *newsin = (struct sockaddr *)&addr; int newlen; int family; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - - dprintk("svc: svc_create_socket(%s, %d, %s)\n", - serv->sv_program->pg_name, protocol, - __svc_print_addr(sin, buf, sizeof(buf))); if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { printk(KERN_WARNING "svc: only UDP and TCP " @@ -1383,7 +1399,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); return (struct svc_xprt *)svsk; bummer: - dprintk("svc: svc_create_socket error = %d\n", -error); sock_release(sock); return ERR_PTR(error); } @@ -1397,8 +1412,6 @@ static void svc_sock_detach(struct svc_xprt *xprt) struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; - dprintk("svc: svc_sock_detach(%p)\n", svsk); - /* put back the old socket callbacks */ lock_sock(sk); sk->sk_state_change = svsk->sk_ostate; @@ -1415,8 +1428,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk); - svc_sock_detach(xprt); if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { @@ -1431,7 +1442,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_sock_free(%p)\n", svsk); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index af7eb8d202ae..1ee73f7cf931 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -10,59 +10,34 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - -#undef SVCRDMA_BACKCHANNEL_DEBUG - /** - * svc_rdma_handle_bc_reply - Process incoming backchannel reply - * @xprt: controlling backchannel transport - * @rdma_resp: pointer to incoming transport header - * @rcvbuf: XDR buffer into which to decode the reply + * svc_rdma_handle_bc_reply - Process incoming backchannel Reply + * @rqstp: resources for handling the Reply + * @rctxt: Received message * - * Returns: - * %0 if @rcvbuf is filled in, xprt_complete_rqst called, - * %-EAGAIN if server should call ->recvfrom again. */ -int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, - struct xdr_buf *rcvbuf) +void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt) { + struct svc_xprt *sxprt = rqstp->rq_xprt; + struct rpc_xprt *xprt = sxprt->xpt_bc_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct xdr_buf *rcvbuf = &rqstp->rq_arg; struct kvec *dst, *src = &rcvbuf->head[0]; + __be32 *rdma_resp = rctxt->rc_recv_buf; struct rpc_rqst *req; u32 credits; - size_t len; - __be32 xid; - __be32 *p; - int ret; - - p = (__be32 *)src->iov_base; - len = src->iov_len; - xid = *rdma_resp; - -#ifdef SVCRDMA_BACKCHANNEL_DEBUG - pr_info("%s: xid=%08x, length=%zu\n", - __func__, be32_to_cpu(xid), len); - pr_info("%s: RPC/RDMA: %*ph\n", - __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp); - pr_info("%s: RPC: %*ph\n", - __func__, (int)len, p); -#endif - - ret = -EAGAIN; - if (src->iov_len < 24) - goto out_shortreply; spin_lock(&xprt->queue_lock); - req = xprt_lookup_rqst(xprt, xid); + req = xprt_lookup_rqst(xprt, *rdma_resp); if (!req) - goto out_notfound; + goto out_unlock; dst = &req->rq_private_buf.head[0]; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); - if (dst->iov_len < len) + if (dst->iov_len < src->iov_len) goto out_unlock; - memcpy(dst->iov_base, p, len); + memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_pin_rqst(req); spin_unlock(&xprt->queue_lock); @@ -71,31 +46,17 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, credits = 1; /* don't deadlock */ else if (credits > r_xprt->rx_buf.rb_bc_max_requests) credits = r_xprt->rx_buf.rb_bc_max_requests; - spin_lock(&xprt->transport_lock); xprt->cwnd = credits << RPC_CWNDSHIFT; spin_unlock(&xprt->transport_lock); spin_lock(&xprt->queue_lock); - ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); xprt_unpin_rqst(req); rcvbuf->len = 0; out_unlock: spin_unlock(&xprt->queue_lock); -out: - return ret; - -out_shortreply: - dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", - xprt, src->iov_len); - goto out; - -out_notfound: - dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", - xprt, be32_to_cpu(xid)); - goto out_unlock; } /* Send a backwards direction RPC call. @@ -192,10 +153,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) *p++ = xdr_zero; *p = xdr_zero; -#ifdef SVCRDMA_BACKCHANNEL_DEBUG - pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); -#endif - rqst->rq_xtime = ktime_get(); rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); if (rc) @@ -206,45 +163,36 @@ put_ctxt: svc_rdma_send_ctxt_put(rdma, ctxt); drop_connection: - dprintk("svcrdma: failed to send bc call\n"); return -ENOTCONN; } -/* Send an RPC call on the passive end of a transport - * connection. +/** + * xprt_rdma_bc_send_request - Send a reverse-direction Call + * @rqst: rpc_rqst containing Call message to be sent + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ -static int -xprt_rdma_bc_send_request(struct rpc_rqst *rqst) +static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) { struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; - struct svcxprt_rdma *rdma; + struct svcxprt_rdma *rdma = + container_of(sxprt, struct svcxprt_rdma, sc_xprt); int ret; - dprintk("svcrdma: sending bc call with xid: %08x\n", - be32_to_cpu(rqst->rq_xid)); + if (test_bit(XPT_DEAD, &sxprt->xpt_flags)) + return -ENOTCONN; - mutex_lock(&sxprt->xpt_mutex); - - ret = -ENOTCONN; - rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) { - ret = rpcrdma_bc_send_request(rdma, rqst); - if (ret == -ENOTCONN) - svc_close_xprt(sxprt); - } - - mutex_unlock(&sxprt->xpt_mutex); - - if (ret < 0) - return ret; - return 0; + ret = rpcrdma_bc_send_request(rdma, rqst); + if (ret == -ENOTCONN) + svc_close_xprt(sxprt); + return ret; } static void xprt_rdma_bc_close(struct rpc_xprt *xprt) { - dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); - xprt_disconnect_done(xprt); xprt->cwnd = RPC_CWNDSHIFT; } @@ -252,8 +200,6 @@ xprt_rdma_bc_close(struct rpc_xprt *xprt) static void xprt_rdma_bc_put(struct rpc_xprt *xprt) { - dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); - xprt_rdma_free_addresses(xprt); xprt_free(xprt); } @@ -288,19 +234,14 @@ xprt_setup_rdma_bc(struct xprt_create *args) struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; - if (args->addrlen > sizeof(xprt->addr)) { - dprintk("RPC: %s: address too large\n", __func__); + if (args->addrlen > sizeof(xprt->addr)) return ERR_PTR(-EBADF); - } xprt = xprt_alloc(args->net, sizeof(*new_xprt), RPCRDMA_MAX_BC_REQUESTS, RPCRDMA_MAX_BC_REQUESTS); - if (!xprt) { - dprintk("RPC: %s: couldn't allocate rpc_xprt\n", - __func__); + if (!xprt) return ERR_PTR(-ENOMEM); - } xprt->timeout = &xprt_rdma_bc_timeout; xprt_set_bound(xprt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index efa5fcb5793f..e426fedb9524 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -665,23 +665,23 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, return hdr_len; out_short: - trace_svcrdma_decode_short(rq_arg->len); + trace_svcrdma_decode_short_err(rq_arg->len); return -EINVAL; out_version: - trace_svcrdma_decode_badvers(rdma_argp); + trace_svcrdma_decode_badvers_err(rdma_argp); return -EPROTONOSUPPORT; out_drop: - trace_svcrdma_decode_drop(rdma_argp); + trace_svcrdma_decode_drop_err(rdma_argp); return 0; out_proc: - trace_svcrdma_decode_badproc(rdma_argp); + trace_svcrdma_decode_badproc_err(rdma_argp); return -EINVAL; out_inval: - trace_svcrdma_decode_parse(rdma_argp); + trace_svcrdma_decode_parse_err(rdma_argp); return -EINVAL; } @@ -878,12 +878,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_drop; rqstp->rq_xprt_hlen = ret; - if (svc_rdma_is_backchannel_reply(xprt, p)) { - ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, - &rqstp->rq_arg); - svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - return ret; - } + if (svc_rdma_is_backchannel_reply(xprt, p)) + goto out_backchannel; + svc_rdma_get_inv_rkey(rdma_xprt, ctxt); p += rpcrdma_fixed_maxsz; @@ -913,6 +910,8 @@ out_postfail: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return ret; +out_backchannel: + svc_rdma_handle_bc_reply(rqstp, ctxt); out_drop: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 23c2d3ce0dc9..5eb35309ecef 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -9,13 +9,10 @@ #include <linux/sunrpc/rpc_rdma.h> #include <linux/sunrpc/svc_rdma.h> -#include <linux/sunrpc/debug.h> #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc); static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc); @@ -39,7 +36,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc); struct svc_rdma_rw_ctxt { struct list_head rw_list; struct rdma_rw_ctx rw_ctx; - int rw_nents; + unsigned int rw_nents; struct sg_table rw_sg_table; struct scatterlist rw_first_sgl[]; }; @@ -67,19 +64,22 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), GFP_KERNEL); if (!ctxt) - goto out; + goto out_noctx; INIT_LIST_HEAD(&ctxt->rw_list); } ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl; if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges, ctxt->rw_sg_table.sgl, - SG_CHUNK_SIZE)) { - kfree(ctxt); - ctxt = NULL; - } -out: + SG_CHUNK_SIZE)) + goto out_free; return ctxt; + +out_free: + kfree(ctxt); +out_noctx: + trace_svcrdma_no_rwctx_err(rdma, sges); + return NULL; } static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, @@ -107,6 +107,34 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma) } } +/** + * svc_rdma_rw_ctx_init - Prepare a R/W context for I/O + * @rdma: controlling transport instance + * @ctxt: R/W context to prepare + * @offset: RDMA offset + * @handle: RDMA tag/handle + * @direction: I/O direction + * + * Returns on success, the number of WQEs that will be needed + * on the workqueue, or a negative errno. + */ +static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, + struct svc_rdma_rw_ctxt *ctxt, + u64 offset, u32 handle, + enum dma_data_direction direction) +{ + int ret; + + ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num, + ctxt->rw_sg_table.sgl, ctxt->rw_nents, + 0, offset, handle, direction); + if (unlikely(ret < 0)) { + svc_rdma_put_rw_ctxt(rdma, ctxt); + trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret); + } + return ret; +} + /* A chunk context tracks all I/O for moving one Read or Write * chunk. This is a a set of rdma_rw's that handle data movement * for all segments of one chunk. @@ -428,15 +456,13 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, ctxt = svc_rdma_get_rw_ctxt(rdma, (write_len >> PAGE_SHIFT) + 2); if (!ctxt) - goto out_noctx; + return -ENOMEM; constructor(info, write_len, ctxt); - ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, - rdma->sc_port_num, ctxt->rw_sg_table.sgl, - ctxt->rw_nents, 0, seg_offset, - seg_handle, DMA_TO_DEVICE); + ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle, + DMA_TO_DEVICE); if (ret < 0) - goto out_initerr; + return -EIO; trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset); @@ -455,18 +481,9 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, return 0; out_overflow: - dprintk("svcrdma: inadequate space in Write chunk (%u)\n", - info->wi_nsegs); + trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no, + info->wi_nsegs); return -E2BIG; - -out_noctx: - dprintk("svcrdma: no R/W ctxs available\n"); - return -ENOMEM; - -out_initerr: - svc_rdma_put_rw_ctxt(rdma, ctxt); - trace_svcrdma_dma_map_rwctx(rdma, ret); - return -EIO; } /* Send one of an xdr_buf's kvecs by itself. To send a Reply @@ -616,7 +633,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT; ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no); if (!ctxt) - goto out_noctx; + return -ENOMEM; ctxt->rw_nents = sge_no; sg = ctxt->rw_sg_table.sgl; @@ -646,29 +663,18 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, goto out_overrun; } - ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp, - cc->cc_rdma->sc_port_num, - ctxt->rw_sg_table.sgl, ctxt->rw_nents, - 0, offset, rkey, DMA_FROM_DEVICE); + ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey, + DMA_FROM_DEVICE); if (ret < 0) - goto out_initerr; + return -EIO; list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; return 0; -out_noctx: - dprintk("svcrdma: no R/W ctxs available\n"); - return -ENOMEM; - out_overrun: - dprintk("svcrdma: request overruns rq_pages\n"); + trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno); return -EINVAL; - -out_initerr: - trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret); - svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt); - return -EIO; } /* Walk the segments in the Read chunk starting at @p and construct diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index b6c8643867f2..38e7c3c8c4a9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) __be32 *p; int ret; - /* Create the RDMA response header. xprt->xpt_mutex, - * acquired in svc_send(), serializes RPC replies. The - * code path below that inserts the credit grant value - * into each transport header runs only inside this - * critical section. - */ + ret = -ENOTCONN; + if (svc_xprt_is_dead(xprt)) + goto err0; + ret = -ENOMEM; sctxt = svc_rdma_send_ctxt_get(rdma); if (!sctxt) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ea54785db4f8..d38be57b00ed 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -211,7 +211,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, newxprt->sc_ord = param->initiator_depth; sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; - svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa); + memcpy(&newxprt->sc_xprt.xpt_remote, sa, + newxprt->sc_xprt.xpt_remotelen); + snprintf(newxprt->sc_xprt.xpt_remotebuf, + sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa); + /* The remote port is arbitrary and not under the control of the * client ULP. Set it to a fixed value so that the DRC continues * to be effective after a reconnect. @@ -309,11 +314,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct svcxprt_rdma *cma_xprt; int ret; - dprintk("svcrdma: Creating RDMA listener\n"); - if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) { - dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family); + if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) return ERR_PTR(-EAFNOSUPPORT); - } cma_xprt = svc_rdma_create_xprt(serv, net); if (!cma_xprt) return ERR_PTR(-ENOMEM); @@ -324,7 +326,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); - dprintk("svcrdma: rdma_create_id failed = %d\n", ret); goto err0; } @@ -333,23 +334,17 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, */ #if IS_ENABLED(CONFIG_IPV6) ret = rdma_set_afonly(listen_id, 1); - if (ret) { - dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret); + if (ret) goto err1; - } #endif ret = rdma_bind_addr(listen_id, sa); - if (ret) { - dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); + if (ret) goto err1; - } cma_xprt->sc_cm_id = listen_id; ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); - if (ret) { - dprintk("svcrdma: rdma_listen failed = %d\n", ret); + if (ret) goto err1; - } /* * We need to use the address from the cm_id in case the @@ -405,9 +400,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!newxprt) return NULL; - dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", - newxprt, newxprt->sc_cm_id); - dev = newxprt->sc_cm_id->device; newxprt->sc_port_num = newxprt->sc_cm_id->port_num; @@ -443,21 +435,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { - dprintk("svcrdma: error creating PD for connect request\n"); + trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd)); goto errout; } newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, IB_POLL_WORKQUEUE); - if (IS_ERR(newxprt->sc_sq_cq)) { - dprintk("svcrdma: error creating SQ CQ for connect request\n"); + if (IS_ERR(newxprt->sc_sq_cq)) goto errout; - } newxprt->sc_rq_cq = ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); - if (IS_ERR(newxprt->sc_rq_cq)) { - dprintk("svcrdma: error creating RQ CQ for connect request\n"); + if (IS_ERR(newxprt->sc_rq_cq)) goto errout; - } memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; @@ -481,7 +469,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); if (ret) { - dprintk("svcrdma: failed to create QP, ret=%d\n", ret); + trace_svcrdma_qp_err(newxprt, ret); goto errout; } newxprt->sc_qp = newxprt->sc_cm_id->qp; @@ -489,8 +477,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) newxprt->sc_snd_w_inv = false; if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) && - !rdma_ib_or_roce(dev, newxprt->sc_port_num)) + !rdma_ib_or_roce(dev, newxprt->sc_port_num)) { + trace_svcrdma_fabric_err(newxprt, -EINVAL); goto errout; + } if (!svc_rdma_post_recvs(newxprt)) goto errout; @@ -512,15 +502,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) conn_param.initiator_depth = min_t(int, newxprt->sc_ord, dev->attrs.max_qp_init_rd_atom); if (!conn_param.initiator_depth) { - dprintk("svcrdma: invalid ORD setting\n"); ret = -EINVAL; + trace_svcrdma_initdepth_err(newxprt, ret); goto errout; } conn_param.private_data = &pmsg; conn_param.private_data_len = sizeof(pmsg); ret = rdma_accept(newxprt->sc_cm_id, &conn_param); - if (ret) + if (ret) { + trace_svcrdma_accept_err(newxprt, ret); goto errout; + } #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) dprintk("svcrdma: new connection %p accepted:\n", newxprt); @@ -535,12 +527,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" ord : %d\n", conn_param.initiator_depth); #endif - trace_svcrdma_xprt_accept(&newxprt->sc_xprt); return &newxprt->sc_xprt; errout: - dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); - trace_svcrdma_xprt_fail(&newxprt->sc_xprt); /* Take a reference in case the DTO handler runs */ svc_xprt_get(&newxprt->sc_xprt); if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) @@ -578,8 +567,6 @@ static void __svc_rdma_free(struct work_struct *work) container_of(work, struct svcxprt_rdma, sc_work); struct svc_xprt *xprt = &rdma->sc_xprt; - trace_svcrdma_xprt_free(xprt); - if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_drain_qp(rdma->sc_qp); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3a143e250b9a..914508ea9b84 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2528,8 +2528,16 @@ static int bc_sendto(struct rpc_rqst *req) return sent; } -/* - * The send routine. Borrows from svc_send +/** + * bc_send_request - Send a backchannel Call on a TCP socket + * @req: rpc_rqst containing Call message to be sent + * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ static int bc_send_request(struct rpc_rqst *req) { |