diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 22:31:49 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 22:31:49 +0200 |
commit | ad9a19d003703ae06a6e8efc64cf26a939d9e84d (patch) | |
tree | 2383dde22df7b69e09c3190bce5ca062b756b1f3 | |
parent | Merge branch 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kdav... (diff) | |
parent | svcrdma: Estimate Send Queue depth properly (diff) | |
download | linux-ad9a19d003703ae06a6e8efc64cf26a939d9e84d.tar.xz linux-ad9a19d003703ae06a6e8efc64cf26a939d9e84d.zip |
Merge tag 'nfsd-4.14' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
"More RDMA work and some op-structure constification from Chuck Lever,
and a small cleanup to our xdr encoding"
* tag 'nfsd-4.14' of git://linux-nfs.org/~bfields/linux:
svcrdma: Estimate Send Queue depth properly
rdma core: Add rdma_rw_mr_payload()
svcrdma: Limit RQ depth
svcrdma: Populate tail iovec when receiving
nfsd: Incoming xdr_bufs may have content in tail buffer
svcrdma: Clean up svc_rdma_build_read_chunk()
sunrpc: Const-ify struct sv_serv_ops
nfsd: Const-ify NFSv4 encoding and decoding ops arrays
sunrpc: Const-ify instances of struct svc_xprt_ops
nfsd4: individual encoders no longer see error cases
nfsd4: skip encoder in trivial error cases
nfsd4: define ->op_release for compound ops
nfsd4: opdesc will be useful outside nfs4proc.c
nfsd4: move some nfsd4 op definitions to xdr4.h
-rw-r--r-- | drivers/infiniband/core/rw.c | 24 | ||||
-rw-r--r-- | fs/lockd/svc.c | 2 | ||||
-rw-r--r-- | fs/nfs/callback.c | 10 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 110 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 346 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 2 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 50 | ||||
-rw-r--r-- | include/linux/sunrpc/svc.h | 6 | ||||
-rw-r--r-- | include/linux/sunrpc/svc_xprt.h | 4 | ||||
-rw-r--r-- | include/rdma/rw.h | 2 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 6 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 6 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_rw.c | 116 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 40 |
14 files changed, 323 insertions, 401 deletions
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index dbfd854c32c9..6ca607e8e293 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -643,6 +643,30 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); +/** + * rdma_rw_mr_factor - return number of MRs required for a payload + * @device: device handling the connection + * @port_num: port num to which the connection is bound + * @maxpages: maximum payload pages per rdma_rw_ctx + * + * Returns the number of MRs the device requires to move @maxpayload + * bytes. The returned value is used during transport creation to + * compute max_rdma_ctxts and the size of the transport's Send and + * Send Completion Queues. + */ +unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, + unsigned int maxpages) +{ + unsigned int mr_pages; + + if (rdma_rw_can_use_mr(device, port_num)) + mr_pages = rdma_rw_fr_page_list_len(device); + else + mr_pages = device->attrs.max_sge_rd; + return DIV_ROUND_UP(maxpages, mr_pages); +} +EXPORT_SYMBOL(rdma_rw_mr_factor); + void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) { u32 factor; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 726b6cecf430..b995bdc13976 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -396,7 +396,7 @@ out_rqst: return error; } -static struct svc_serv_ops lockd_sv_ops = { +static const struct svc_serv_ops lockd_sv_ops = { .svo_shutdown = svc_rpcb_cleanup, .svo_enqueue_xprt = svc_xprt_do_enqueue, }; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 34323877ec13..2cddf7f437e6 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -226,26 +226,26 @@ err_bind: return ret; } -static struct svc_serv_ops nfs40_cb_sv_ops = { +static const struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) -static struct svc_serv_ops nfs41_cb_sv_ops = { +static const struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; -static struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = &nfs41_cb_sv_ops, }; #else -static struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = NULL, }; @@ -254,8 +254,8 @@ static struct svc_serv_ops *nfs4_cb_sv_ops[] = { static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + const struct svc_serv_ops *sv_ops; struct svc_serv *serv; - struct svc_serv_ops *sv_ops; /* * Check whether we're already up and running. diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d27e75ad25e3..3c69db7d4905 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -784,6 +784,14 @@ out: return status; } + +static void +nfsd4_read_release(union nfsd4_op_u *u) +{ + if (u->read.rd_filp) + fput(u->read.rd_filp); +} + static __be32 nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -912,6 +920,13 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat return nfs_ok; } +static void +nfsd4_secinfo_release(union nfsd4_op_u *u) +{ + if (u->secinfo.si_exp) + exp_put(u->secinfo.si_exp); +} + static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1335,6 +1350,12 @@ out: return nfserr; } +static void +nfsd4_getdeviceinfo_release(union nfsd4_op_u *u) +{ + kfree(u->getdeviceinfo.gd_device); +} + static __be32 nfsd4_layoutget(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1415,6 +1436,12 @@ out: return nfserr; } +static void +nfsd4_layoutget_release(union nfsd4_op_u *u) +{ + kfree(u->layoutget.lg_content); +} + static __be32 nfsd4_layoutcommit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1541,49 +1568,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum) nfsdstats.nfs4_opcount[opnum]++; } -enum nfsd4_op_flags { - ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ - ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ - ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ - /* For rfc 5661 section 2.6.3.1.1: */ - OP_HANDLES_WRONGSEC = 1 << 3, - OP_IS_PUTFH_LIKE = 1 << 4, - /* - * These are the ops whose result size we estimate before - * encoding, to avoid performing an op then not being able to - * respond or cache a response. This includes writes and setattrs - * as well as the operations usually called "nonidempotent": - */ - OP_MODIFIES_SOMETHING = 1 << 5, - /* - * Cache compounds containing these ops in the xid-based drc: - * We use the DRC for compounds containing non-idempotent - * operations, *except* those that are 4.1-specific (since - * sessions provide their own EOS), and except for stateful - * operations other than setclientid and setclientid_confirm - * (since sequence numbers provide EOS for open, lock, etc in - * the v4.0 case). - */ - OP_CACHEME = 1 << 6, - /* - * These are ops which clear current state id. - */ - OP_CLEAR_STATEID = 1 << 7, -}; - -struct nfsd4_operation { - __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *, - union nfsd4_op_u *); - u32 op_flags; - char *op_name; - /* Try to get response size before operation */ - u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); - void (*op_get_currentstateid)(struct nfsd4_compound_state *, - union nfsd4_op_u *); - void (*op_set_currentstateid)(struct nfsd4_compound_state *, - union nfsd4_op_u *); -}; - static const struct nfsd4_operation nfsd4_ops[]; static const char *nfsd4_op_name(unsigned opnum); @@ -1621,7 +1605,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } -static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +const struct nfsd4_operation *OPDESC(struct nfsd4_op *op) { return &nfsd4_ops[op->opnum]; } @@ -1694,7 +1678,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd4_compoundargs *args = rqstp->rq_argp; struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_op *op; - const struct nfsd4_operation *opdesc; struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; @@ -1747,15 +1730,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } - opdesc = OPDESC(op); - if (!current_fh->fh_dentry) { - if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { + if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } } else if (current_fh->fh_export->ex_fslocs.migrated && - !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { + !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; } @@ -1763,12 +1744,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) fh_clear_wcc(current_fh); /* If op is non-idempotent */ - if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { + if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a * succesful reply: */ - u32 plen = opdesc->op_rsize_bop(rqstp, op); + u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* * Plus if there's another operation, make sure * we'll have space to at least encode an error: @@ -1781,9 +1762,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->status) goto encode_op; - if (opdesc->op_get_currentstateid) - opdesc->op_get_currentstateid(cstate, &op->u); - op->status = opdesc->op_func(rqstp, cstate, &op->u); + if (op->opdesc->op_get_currentstateid) + op->opdesc->op_get_currentstateid(cstate, &op->u); + op->status = op->opdesc->op_func(rqstp, cstate, &op->u); /* Only from SEQUENCE */ if (cstate->status == nfserr_replay_cache) { @@ -1792,10 +1773,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto out; } if (!op->status) { - if (opdesc->op_set_currentstateid) - opdesc->op_set_currentstateid(cstate, &op->u); + if (op->opdesc->op_set_currentstateid) + op->opdesc->op_set_currentstateid(cstate, &op->u); - if (opdesc->op_flags & OP_CLEAR_STATEID) + if (op->opdesc->op_flags & OP_CLEAR_STATEID) clear_current_stateid(cstate); if (need_wrongsec_check(rqstp)) @@ -2160,13 +2141,15 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOCK] = { .op_func = nfsd4_lock, - .op_flags = OP_MODIFIES_SOMETHING, + .op_flags = OP_MODIFIES_SOMETHING | + OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCK", .op_rsize_bop = nfsd4_lock_rsize, .op_set_currentstateid = nfsd4_set_lockstateid, }, [OP_LOCKT] = { .op_func = nfsd4_lockt, + .op_flags = OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCKT", .op_rsize_bop = nfsd4_lock_rsize, }, @@ -2238,6 +2221,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_READ] = { .op_func = nfsd4_read, + .op_release = nfsd4_read_release, .op_name = "OP_READ", .op_rsize_bop = nfsd4_read_rsize, .op_get_currentstateid = nfsd4_get_readstateid, @@ -2287,6 +2271,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO] = { .op_func = nfsd4_secinfo, + .op_release = nfsd4_secinfo_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO", .op_rsize_bop = nfsd4_secinfo_rsize, @@ -2294,14 +2279,16 @@ static const struct nfsd4_operation nfsd4_ops[] = { [OP_SETATTR] = { .op_func = nfsd4_setattr, .op_name = "OP_SETATTR", - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME + | OP_NONTRIVIAL_ERROR_ENCODE, .op_rsize_bop = nfsd4_setattr_rsize, .op_get_currentstateid = nfsd4_get_setattrstateid, }, [OP_SETCLIENTID] = { .op_func = nfsd4_setclientid, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_MODIFIES_SOMETHING | OP_CACHEME, + | OP_MODIFIES_SOMETHING | OP_CACHEME + | OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_SETCLIENTID", .op_rsize_bop = nfsd4_setclientid_rsize, }, @@ -2388,6 +2375,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = nfsd4_secinfo_no_name, + .op_release = nfsd4_secinfo_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", .op_rsize_bop = nfsd4_secinfo_rsize, @@ -2408,12 +2396,14 @@ static const struct nfsd4_operation nfsd4_ops[] = { #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = { .op_func = nfsd4_getdeviceinfo, + .op_release = nfsd4_getdeviceinfo_release, .op_flags = ALLOWED_WITHOUT_FH, .op_name = "OP_GETDEVICEINFO", .op_rsize_bop = nfsd4_getdeviceinfo_rsize, }, [OP_LAYOUTGET] = { .op_func = nfsd4_layoutget, + .op_release = nfsd4_layoutget_release, .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LAYOUTGET", .op_rsize_bop = nfsd4_layoutget_rsize, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5f940d2a136b..2c61c6b8ae09 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -159,6 +159,25 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ unsigned int avail = (char *)argp->end - (char *)argp->p; __be32 *p; + + if (argp->pagelen == 0) { + struct kvec *vec = &argp->rqstp->rq_arg.tail[0]; + + if (!argp->tail) { + argp->tail = true; + avail = vec->iov_len; + argp->p = vec->iov_base; + argp->end = vec->iov_base + avail; + } + + if (avail < nbytes) + return NULL; + + p = argp->p; + argp->p += XDR_QUADLEN(nbytes); + return p; + } + if (avail + argp->pagelen < nbytes) return NULL; if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ @@ -1778,7 +1797,7 @@ nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); -static nfsd4_dec nfsd4_dec_ops[] = { +static const nfsd4_dec nfsd4_dec_ops[] = { [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, @@ -1927,6 +1946,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } + op->opdesc = OPDESC(op); /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: @@ -3102,14 +3122,12 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(access->ac_supported); - *p++ = cpu_to_be32(access->ac_resp_access); - } - return nfserr; + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); + return 0; } static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) @@ -3117,17 +3135,15 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, - NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(bcts->dir); - /* Upshifting from TCP to RDMA is not supported */ - *p++ = cpu_to_be32(0); - } - return nfserr; + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(bcts->dir); + /* Upshifting from TCP to RDMA is not supported */ + *p++ = cpu_to_be32(0); + return 0; } static __be32 @@ -3135,10 +3151,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &close->cl_stateid); } @@ -3148,14 +3161,12 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, NFS4_VERIFIER_SIZE); - } - return nfserr; + return 0; } static __be32 @@ -3164,15 +3175,13 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - encode_cinfo(p, &create->cr_cinfo); - nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], - create->cr_bmval[1], create->cr_bmval[2]); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + encode_cinfo(p, &create->cr_cinfo); + nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], + create->cr_bmval[1], create->cr_bmval[2]); + return 0; } static __be32 @@ -3181,13 +3190,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = &resp->xdr; - if (nfserr) - return nfserr; - - nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, - getattr->ga_bmval, - resp->rqstp, 0); - return nfserr; + return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); } static __be32 @@ -3198,14 +3202,12 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh unsigned int len; __be32 *p; - if (!nfserr) { - len = fhp->fh_handle.fh_size; - p = xdr_reserve_space(xdr, len + 4); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); - } - return nfserr; + len = fhp->fh_handle.fh_size; + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); + return 0; } /* @@ -3275,10 +3277,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &locku->lu_stateid); } @@ -3288,13 +3287,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &link->li_cinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &link->li_cinfo); + return 0; } @@ -3304,12 +3301,9 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - goto out; - nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); if (nfserr) - goto out; + return nfserr; p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; @@ -3319,7 +3313,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], open->op_bmval[2]); if (nfserr) - goto out; + return nfserr; p = xdr_reserve_space(xdr, 4); if (!p) @@ -3392,8 +3386,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op BUG(); } /* XXX save filehandle here */ -out: - return nfserr; + return 0; } static __be32 @@ -3401,10 +3394,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 @@ -3412,10 +3402,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &od->od_stateid); } static __be32 nfsd4_encode_splice_read( @@ -3552,20 +3539,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct raparms *ra = NULL; __be32 *p; - if (nfserr) - goto out; - p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); - nfserr = nfserr_resource; - goto out; + return nfserr_resource; } if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); - nfserr = nfserr_resource; - goto out; + return nfserr_resource; } xdr_commit_encode(xdr); @@ -3589,9 +3571,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) xdr_truncate_encode(xdr, starting_len); -out: - if (file) - fput(file); return nfserr; } @@ -3605,9 +3584,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int length_offset = xdr->buf->len; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; @@ -3651,9 +3627,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int starting_len = xdr->buf->len; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; @@ -3739,13 +3712,11 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &remove->rm_cinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &remove->rm_cinfo); + return 0; } static __be32 @@ -3754,19 +3725,16 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 40); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &rename->rn_sinfo); - p = encode_cinfo(p, &rename->rn_tinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); + return 0; } static __be32 -nfsd4_do_encode_secinfo(struct xdr_stream *xdr, - __be32 nfserr, struct svc_export *exp) +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) { u32 i, nflavs, supported; struct exp_flavor_info *flavs; @@ -3774,9 +3742,6 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, __be32 *p, *flavorsp; static bool report = true; - if (nfserr) - goto out; - nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -3800,7 +3765,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, supported = 0; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; flavorsp = p++; /* to be backfilled later */ for (i = 0; i < nflavs; i++) { @@ -3812,7 +3777,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4 + 4 + XDR_LEN(info.oid.len) + 4 + 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(RPC_AUTH_GSS); p = xdr_encode_opaque(p, info.oid.data, info.oid.len); *p++ = cpu_to_be32(info.qop); @@ -3821,7 +3786,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, supported++; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(pf); } else { if (report) @@ -3833,11 +3798,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, if (nflavs != supported) report = false; *flavorsp = htonl(supported); - nfserr = 0; -out: - if (exp) - exp_put(exp); - return nfserr; + return 0; } static __be32 @@ -3846,7 +3807,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, { struct xdr_stream *xdr = &resp->xdr; - return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); + return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); } static __be32 @@ -3855,7 +3816,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, { struct xdr_stream *xdr = &resp->xdr; - return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); + return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } /* @@ -3916,16 +3877,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 16); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(write->wr_bytes_written); - *p++ = cpu_to_be32(write->wr_how_written); - p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, - NFS4_VERIFIER_SIZE); - } - return nfserr; + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); + return 0; } static __be32 @@ -3938,12 +3897,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, char *server_scope; int major_id_sz; int server_scope_sz; - int status = 0; uint64_t minor_id = 0; - if (nfserr) - return nfserr; - major_id = utsname()->nodename; major_id_sz = strlen(major_id); server_scope = utsname()->nodename; @@ -3968,19 +3923,19 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, break; case SP4_MACH_CRED: /* spo_must_enforce bitmap: */ - status = nfsd4_encode_bitmap(xdr, + nfserr = nfsd4_encode_bitmap(xdr, exid->spo_must_enforce[0], exid->spo_must_enforce[1], exid->spo_must_enforce[2]); - if (status) - goto out; + if (nfserr) + return nfserr; /* spo_must_allow bitmap: */ - status = nfsd4_encode_bitmap(xdr, + nfserr = nfsd4_encode_bitmap(xdr, exid->spo_must_allow[0], exid->spo_must_allow[1], exid->spo_must_allow[2]); - if (status) - goto out; + if (nfserr) + return nfserr; break; default: WARN_ON_ONCE(1); @@ -4007,8 +3962,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* Implementation id */ *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ return 0; -out: - return status; } static __be32 @@ -4018,9 +3971,6 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; @@ -4074,9 +4024,6 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); if (!p) return nfserr_resource; @@ -4101,9 +4048,6 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); if (!p) return nfserr_resource; @@ -4113,7 +4057,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = stateid->ts_id_status; } - return nfserr; + return 0; } #ifdef CONFIG_NFSD_PNFS @@ -4126,14 +4070,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, u32 starting_len = xdr->buf->len, needed_len; __be32 *p; - dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr)); - if (nfserr) - goto out; - - nfserr = nfserr_resource; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(gdev->gd_layout_type); @@ -4149,42 +4088,33 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, */ if (xdr->buf->len + 4 > gdev->gd_maxcount) goto toosmall; - goto out; + return nfserr; } } - nfserr = nfserr_resource; if (gdev->gd_notify_types) { p = xdr_reserve_space(xdr, 4 + 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(1); /* bitmap length */ *p++ = cpu_to_be32(gdev->gd_notify_types); } else { p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = 0; } - nfserr = 0; -out: - kfree(gdev->gd_device); - dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr)); - return nfserr; - + return 0; toosmall: dprintk("%s: maxcount too small\n", __func__); needed_len = xdr->buf->len + 4 /* notifications */; xdr_truncate_encode(xdr, starting_len); p = xdr_reserve_space(xdr, 4); - if (!p) { - nfserr = nfserr_resource; - } else { - *p++ = cpu_to_be32(needed_len); - nfserr = nfserr_toosmall; - } - goto out; + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(needed_len); + return nfserr_toosmall; } static __be32 @@ -4195,14 +4125,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, const struct nfsd4_layout_ops *ops; __be32 *p; - dprintk("%s: err %d\n", __func__, nfserr); - if (nfserr) - goto out; - - nfserr = nfserr_resource; p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t)); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(1); /* we always set return-on-close */ *p++ = cpu_to_be32(lgp->lg_sid.si_generation); @@ -4216,10 +4141,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(lgp->lg_layout_type); ops = nfsd4_layout_ops[lgp->lg_layout_type]; - nfserr = ops->encode_layoutget(xdr, lgp); -out: - kfree(lgp->lg_content); - return nfserr; + return ops->encode_layoutget(xdr, lgp); } static __be32 @@ -4229,9 +4151,6 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; @@ -4243,7 +4162,7 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_encode_hyper(p, lcp->lc_newsize); } - return nfs_ok; + return 0; } static __be32 @@ -4253,16 +4172,13 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; *p++ = cpu_to_be32(lrp->lrs_present); if (lrp->lrs_present) return nfsd4_encode_stateid(xdr, &lrp->lr_sid); - return nfs_ok; + return 0; } #endif /* CONFIG_NFSD_PNFS */ @@ -4289,16 +4205,14 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - if (!nfserr) { - nfserr = nfsd42_encode_write_res(resp, ©->cp_res); - if (nfserr) - return nfserr; + nfserr = nfsd42_encode_write_res(resp, ©->cp_res); + if (nfserr) + return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 4); - *p++ = cpu_to_be32(copy->cp_consecutive); - *p++ = cpu_to_be32(copy->cp_synchronous); - } - return nfserr; + p = xdr_reserve_space(&resp->xdr, 4 + 4); + *p++ = cpu_to_be32(copy->cp_consecutive); + *p++ = cpu_to_be32(copy->cp_synchronous); + return 0; } static __be32 @@ -4307,14 +4221,11 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 8); *p++ = cpu_to_be32(seek->seek_eof); p = xdr_encode_hyper(p, seek->seek_pos); - return nfserr; + return 0; } static __be32 @@ -4330,7 +4241,7 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * since we don't need to filter out obsolete ops as this is * done in the decoding phase. */ -static nfsd4_enc nfsd4_enc_ops[] = { +static const nfsd4_enc nfsd4_enc_ops[] = { [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, @@ -4449,6 +4360,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) struct xdr_stream *xdr = &resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; + const struct nfsd4_operation *opdesc = op->opdesc; int post_err_offset; nfsd4_enc encoder; __be32 *p; @@ -4463,10 +4375,15 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) if (op->opnum == OP_ILLEGAL) goto status; + if (op->status && opdesc && + !(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE)) + goto status; BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + if (opdesc && opdesc->op_release) + opdesc->op_release(&op->u); xdr_commit_encode(xdr); /* nfsd4_check_resp_size guarantees enough room for error status */ @@ -4573,6 +4490,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; args->pagelist = rqstp->rq_arg.pages; args->pagelen = rqstp->rq_arg.page_len; + args->tail = false; args->tmpp = NULL; args->to_free = NULL; args->ops = args->iops; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 063ae7de2c12..7e3af3ef0917 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -475,7 +475,7 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static struct svc_serv_ops nfsd_thread_sv_ops = { +static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, .svo_enqueue_xprt = svc_xprt_do_enqueue, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 72c6ad136107..1e4edbf70052 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -538,6 +538,7 @@ struct nfsd4_seek { struct nfsd4_op { int opnum; + const struct nfsd4_operation * opdesc; __be32 status; union nfsd4_op_u { struct nfsd4_access access; @@ -614,6 +615,7 @@ struct nfsd4_compoundargs { __be32 * end; struct page ** pagelist; int pagelen; + bool tail; __be32 tmp[8]; __be32 * tmpp; struct svcxdr_tmpbuf *to_free; @@ -661,6 +663,7 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +const struct nfsd4_operation *OPDESC(struct nfsd4_op *op); int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); void warn_on_nonidempotent_op(struct nfsd4_op *op); @@ -748,6 +751,53 @@ extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *); extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); +enum nfsd4_op_flags { + ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ + ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ + ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, + /* + * These are the ops whose result size we estimate before + * encoding, to avoid performing an op then not being able to + * respond or cache a response. This includes writes and setattrs + * as well as the operations usually called "nonidempotent": + */ + OP_MODIFIES_SOMETHING = 1 << 5, + /* + * Cache compounds containing these ops in the xid-based drc: + * We use the DRC for compounds containing non-idempotent + * operations, *except* those that are 4.1-specific (since + * sessions provide their own EOS), and except for stateful + * operations other than setclientid and setclientid_confirm + * (since sequence numbers provide EOS for open, lock, etc in + * the v4.0 case). + */ + OP_CACHEME = 1 << 6, + /* + * These are ops which clear current state id. + */ + OP_CLEAR_STATEID = 1 << 7, + /* Most ops return only an error on failure; some may do more: */ + OP_NONTRIVIAL_ERROR_ENCODE = 1 << 8, +}; + +struct nfsd4_operation { + __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *, + union nfsd4_op_u *); + void (*op_release)(union nfsd4_op_u *); + u32 op_flags; + char *op_name; + /* Try to get response size before operation */ + u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); + void (*op_get_currentstateid)(struct nfsd4_compound_state *, + union nfsd4_op_u *); + void (*op_set_currentstateid)(struct nfsd4_compound_state *, + union nfsd4_op_u *); +}; + + #endif /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a3f8af9bd543..38f561b2dda3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -99,7 +99,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - struct svc_serv_ops *sv_ops; /* server operations */ + const struct svc_serv_ops *sv_ops; /* server operations */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -465,7 +465,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - struct svc_serv_ops *); + const struct svc_serv_ops *); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, @@ -475,7 +475,7 @@ void svc_exit_thread(struct svc_rqst *); unsigned int svc_pool_map_get(void); void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - struct svc_serv_ops *); + const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ddb7f94a9d06..6a2ad38f5458 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -31,7 +31,7 @@ struct svc_xprt_ops { struct svc_xprt_class { const char *xcl_name; struct module *xcl_owner; - struct svc_xprt_ops *xcl_ops; + const struct svc_xprt_ops *xcl_ops; struct list_head xcl_list; u32 xcl_max_payload; int xcl_ident; @@ -49,7 +49,7 @@ struct svc_xpt_user { struct svc_xprt { struct svc_xprt_class *xpt_class; - struct svc_xprt_ops *xpt_ops; + const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; struct list_head xpt_list; struct list_head xpt_ready; diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 377d865e506d..a3cbbc7b6417 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -81,6 +81,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); +unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, + unsigned int maxpages); void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); void rdma_rw_cleanup_mrs(struct ib_qp *qp); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 85ce0db5b0a6..aa04666f929d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int vers; @@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { return __svc_create(prog, bufsize, /*npools*/1, ops); } @@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e18500151236..399fab5d1936 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -693,7 +693,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv, return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags); } -static struct svc_xprt_ops svc_udp_ops = { +static const struct svc_xprt_ops svc_udp_ops = { .xpo_create = svc_udp_create, .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, @@ -1241,7 +1241,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt) { } -static struct svc_xprt_ops svc_tcp_bc_ops = { +static const struct svc_xprt_ops svc_tcp_bc_ops = { .xpo_create = svc_bc_tcp_create, .xpo_detach = svc_bc_tcp_sock_detach, .xpo_free = svc_bc_sock_free, @@ -1275,7 +1275,7 @@ static void svc_cleanup_bc_xprt_sock(void) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -static struct svc_xprt_ops svc_tcp_ops = { +static const struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 933f79bed270..7dcda4597057 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -660,19 +660,21 @@ out_initerr: return -EIO; } +/* Walk the segments in the Read chunk starting at @p and construct + * RDMA Read operations to pull the chunk to the server. + */ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, struct svc_rdma_read_info *info, __be32 *p) { int ret; + ret = -EINVAL; info->ri_chunklen = 0; - while (*p++ != xdr_zero) { + while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) { u32 rs_handle, rs_length; u64 rs_offset; - if (be32_to_cpup(p++) != info->ri_position) - break; rs_handle = be32_to_cpup(p++); rs_length = be32_to_cpup(p++); p = xdr_decode_hyper(p, &rs_offset); @@ -689,78 +691,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, return ret; } -/* If there is inline content following the Read chunk, append it to - * the page list immediately following the data payload. This has to - * be done after the reader function has determined how many pages - * were consumed for RDMA Read. - * - * On entry, ri_pageno and ri_pageoff point directly to the end of the - * page list. On exit, both have been updated to the new "next byte". - * - * Assumptions: - * - Inline content fits entirely in rq_pages[0] - * - Trailing content is only a handful of bytes - */ -static int svc_rdma_copy_tail(struct svc_rqst *rqstp, - struct svc_rdma_read_info *info) -{ - struct svc_rdma_op_ctxt *head = info->ri_readctxt; - unsigned int tail_length, remaining; - u8 *srcp, *destp; - - /* Assert that all inline content fits in page 0. This is an - * implementation limit, not a protocol limit. - */ - if (head->arg.head[0].iov_len > PAGE_SIZE) { - pr_warn_once("svcrdma: too much trailing inline content\n"); - return -EINVAL; - } - - srcp = head->arg.head[0].iov_base; - srcp += info->ri_position; - tail_length = head->arg.head[0].iov_len - info->ri_position; - remaining = tail_length; - - /* If there is room on the last page in the page list, try to - * fit the trailing content there. - */ - if (info->ri_pageoff > 0) { - unsigned int len; - - len = min_t(unsigned int, remaining, - PAGE_SIZE - info->ri_pageoff); - destp = page_address(rqstp->rq_pages[info->ri_pageno]); - destp += info->ri_pageoff; - - memcpy(destp, srcp, len); - srcp += len; - destp += len; - info->ri_pageoff += len; - remaining -= len; - - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; - } - } - - /* Otherwise, a fresh page is needed. */ - if (remaining) { - head->arg.pages[info->ri_pageno] = - rqstp->rq_pages[info->ri_pageno]; - head->count++; - - destp = page_address(rqstp->rq_pages[info->ri_pageno]); - memcpy(destp, srcp, remaining); - info->ri_pageoff += remaining; - } - - head->arg.page_len += tail_length; - head->arg.len += tail_length; - head->arg.buflen += tail_length; - return 0; -} - /* Construct RDMA Reads to pull over a normal Read chunk. The chunk * data lands in the page list of head->arg.pages. * @@ -785,34 +715,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, if (ret < 0) goto out; - /* Read chunk may need XDR round-up (see RFC 5666, s. 3.7). + /* Split the Receive buffer between the head and tail + * buffers at Read chunk's position. XDR roundup of the + * chunk is not included in either the pagelist or in + * the tail. */ - if (info->ri_chunklen & 3) { - u32 padlen = 4 - (info->ri_chunklen & 3); - - info->ri_chunklen += padlen; + head->arg.tail[0].iov_base = + head->arg.head[0].iov_base + info->ri_position; + head->arg.tail[0].iov_len = + head->arg.head[0].iov_len - info->ri_position; + head->arg.head[0].iov_len = info->ri_position; - /* NB: data payload always starts on XDR alignment, - * thus the pad can never contain a page boundary. - */ - info->ri_pageoff += padlen; - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; - } - } + /* Read chunk may need XDR roundup (see RFC 5666, s. 3.7). + * + * NFSv2/3 write decoders need the length of the tail to + * contain the size of the roundup padding. + */ + head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3); head->arg.page_len = info->ri_chunklen; head->arg.len += info->ri_chunklen; head->arg.buflen += info->ri_chunklen; - if (info->ri_position < head->arg.head[0].iov_len) { - ret = svc_rdma_copy_tail(rqstp, info); - if (ret < 0) - goto out; - } - head->arg.head[0].iov_len = info->ri_position; - out: return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e660d4965b18..5caf8e722a11 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -51,6 +51,7 @@ #include <linux/workqueue.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> +#include <rdma/rw.h> #include <linux/sunrpc/svc_rdma.h> #include <linux/export.h> #include "xprt_rdma.h" @@ -70,7 +71,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_secure_port(struct svc_rqst *); static void svc_rdma_kill_temp_xprt(struct svc_xprt *); -static struct svc_xprt_ops svc_rdma_ops = { +static const struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, .xpo_recvfrom = svc_rdma_recvfrom, .xpo_sendto = svc_rdma_sendto, @@ -98,7 +99,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *, static void svc_rdma_bc_detach(struct svc_xprt *); static void svc_rdma_bc_free(struct svc_xprt *); -static struct svc_xprt_ops svc_rdma_bc_ops = { +static const struct svc_xprt_ops svc_rdma_bc_ops = { .xpo_create = svc_rdma_bc_create, .xpo_detach = svc_rdma_bc_detach, .xpo_free = svc_rdma_bc_free, @@ -167,8 +168,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) { unsigned int i; - /* Each RPC/RDMA credit can consume a number of send - * and receive WQEs. One ctxt is allocated for each. + /* Each RPC/RDMA credit can consume one Receive and + * one Send WQE at the same time. */ i = xprt->sc_sq_depth + xprt->sc_rq_depth; @@ -713,7 +714,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct ib_qp_init_attr qp_attr; struct ib_device *dev; struct sockaddr *sap; - unsigned int i; + unsigned int i, ctxts; int ret = 0; listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); @@ -742,14 +743,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge, (size_t)RPCSVC_MAXPAGES); newxprt->sc_max_req_size = svcrdma_max_req_size; - newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, - svcrdma_max_requests); - newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); - newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, - svcrdma_max_bc_requests); + newxprt->sc_max_requests = svcrdma_max_requests; + newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; newxprt->sc_rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; - newxprt->sc_sq_depth = newxprt->sc_rq_depth; + if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) { + pr_warn("svcrdma: reducing receive depth to %d\n", + dev->attrs.max_qp_wr); + newxprt->sc_rq_depth = dev->attrs.max_qp_wr; + newxprt->sc_max_requests = newxprt->sc_rq_depth - 2; + newxprt->sc_max_bc_requests = 2; + } + newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); + ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES); + ctxts *= newxprt->sc_max_requests; + newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts; + if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) { + pr_warn("svcrdma: reducing send depth to %d\n", + dev->attrs.max_qp_wr); + newxprt->sc_sq_depth = dev->attrs.max_qp_wr; + } atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); if (!svc_rdma_prealloc_ctxts(newxprt)) @@ -784,8 +797,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = &newxprt->sc_xprt; qp_attr.port_num = newxprt->sc_port_num; - qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests; - qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; + qp_attr.cap.max_rdma_ctxs = ctxts; + qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts; qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; qp_attr.cap.max_send_sge = newxprt->sc_max_sge; qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; @@ -853,6 +866,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); dprintk(" max_sge : %d\n", newxprt->sc_max_sge); dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); + dprintk(" rdma_rw_ctxs : %d\n", ctxts); dprintk(" max_requests : %d\n", newxprt->sc_max_requests); dprintk(" ord : %d\n", newxprt->sc_ord); |