From c1326210477ecc06c53221f0005c64419aba30d6 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:20 +0000 Subject: nfs,nfsd: NFSv4.2 extended attribute protocol definitions Add definitions for the new operations, errors and flags as defined in RFC 8276 (File System Extended Attributes in NFSv4). Signed-off-by: Frank van der Linden Signed-off-by: Chuck Lever --- include/linux/nfs4.h | 20 ++++++++++++++++++++ include/uapi/linux/nfs4.h | 3 +++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 4dba3c948932..e6ca9d1d2e76 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -150,6 +150,12 @@ enum nfs_opnum4 { OP_WRITE_SAME = 70, OP_CLONE = 71, + /* xattr support (RFC8726) */ + OP_GETXATTR = 72, + OP_SETXATTR = 73, + OP_LISTXATTRS = 74, + OP_REMOVEXATTR = 75, + OP_ILLEGAL = 10044, }; @@ -280,6 +286,10 @@ enum nfsstat4 { NFS4ERR_WRONG_LFS = 10092, NFS4ERR_BADLABEL = 10093, NFS4ERR_OFFLOAD_NO_REQS = 10094, + + /* xattr (RFC8276) */ + NFS4ERR_NOXATTR = 10095, + NFS4ERR_XATTR2BIG = 10096, }; static inline bool seqid_mutating_err(u32 err) @@ -452,6 +462,7 @@ enum change_attr_type4 { #define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) +#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) @@ -700,4 +711,13 @@ struct nl4_server { struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */ } u; }; + +/* + * Options for setxattr. These match the flags for setxattr(2). + */ +enum nfs4_setxattr_options { + SETXATTR4_EITHER = 0, + SETXATTR4_CREATE = 1, + SETXATTR4_REPLACE = 2, +}; #endif diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 8572930cf5b0..bf197e99b98f 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -33,6 +33,9 @@ #define NFS4_ACCESS_EXTEND 0x0008 #define NFS4_ACCESS_DELETE 0x0010 #define NFS4_ACCESS_EXECUTE 0x0020 +#define NFS4_ACCESS_XAREAD 0x0040 +#define NFS4_ACCESS_XAWRITE 0x0080 +#define NFS4_ACCESS_XALIST 0x0100 #define NFS4_FH_PERSISTENT 0x0000 #define NFS4_FH_NOEXPIRE_WITH_OPEN 0x0001 -- cgit v1.2.3 From 08b5d5014a27e717826999ad20e394a8811aae92 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:18 +0000 Subject: xattr: break delegations in {set,remove}xattr set/removexattr on an exported filesystem should break NFS delegations. This is true in general, but also for the upcoming support for RFC 8726 (NFSv4 extended attribute support). Make sure that they do. Additionally, they need to grow a _locked variant, since callers might call this with i_rwsem held (like the NFS server code). Cc: stable@vger.kernel.org # v4.9+ Cc: linux-fsdevel@vger.kernel.org Cc: Al Viro Signed-off-by: Frank van der Linden Signed-off-by: Chuck Lever --- fs/xattr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/xattr.h | 2 ++ 2 files changed, 79 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/xattr.c b/fs/xattr.c index 91608d9bfc6a..95f38f57347f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -204,10 +204,22 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, return error; } - +/** + * __vfs_setxattr_locked: set an extended attribute while holding the inode + * lock + * + * @dentry - object to perform setxattr on + * @name - xattr name to set + * @value - value to set @name to + * @size - size of @value + * @flags - flags to pass into filesystem operations + * @delegated_inode - on return, will contain an inode pointer that + * a delegation was broken on, NULL if none. + */ int -vfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) +__vfs_setxattr_locked(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, + struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; int error; @@ -216,15 +228,40 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (error) return error; - inode_lock(inode); error = security_inode_setxattr(dentry, name, value, size, flags); if (error) goto out; + error = try_break_deleg(inode, delegated_inode); + if (error) + goto out; + error = __vfs_setxattr_noperm(dentry, name, value, size, flags); out: + return error; +} +EXPORT_SYMBOL_GPL(__vfs_setxattr_locked); + +int +vfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct inode *delegated_inode = NULL; + int error; + +retry_deleg: + inode_lock(inode); + error = __vfs_setxattr_locked(dentry, name, value, size, flags, + &delegated_inode); inode_unlock(inode); + + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } return error; } EXPORT_SYMBOL_GPL(vfs_setxattr); @@ -378,8 +415,18 @@ __vfs_removexattr(struct dentry *dentry, const char *name) } EXPORT_SYMBOL(__vfs_removexattr); +/** + * __vfs_removexattr_locked: set an extended attribute while holding the inode + * lock + * + * @dentry - object to perform setxattr on + * @name - name of xattr to remove + * @delegated_inode - on return, will contain an inode pointer that + * a delegation was broken on, NULL if none. + */ int -vfs_removexattr(struct dentry *dentry, const char *name) +__vfs_removexattr_locked(struct dentry *dentry, const char *name, + struct inode **delegated_inode) { struct inode *inode = dentry->d_inode; int error; @@ -388,11 +435,14 @@ vfs_removexattr(struct dentry *dentry, const char *name) if (error) return error; - inode_lock(inode); error = security_inode_removexattr(dentry, name); if (error) goto out; + error = try_break_deleg(inode, delegated_inode); + if (error) + goto out; + error = __vfs_removexattr(dentry, name); if (!error) { @@ -401,12 +451,32 @@ vfs_removexattr(struct dentry *dentry, const char *name) } out: + return error; +} +EXPORT_SYMBOL_GPL(__vfs_removexattr_locked); + +int +vfs_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + struct inode *delegated_inode = NULL; + int error; + +retry_deleg: + inode_lock(inode); + error = __vfs_removexattr_locked(dentry, name, &delegated_inode); inode_unlock(inode); + + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + return error; } EXPORT_SYMBOL_GPL(vfs_removexattr); - /* * Extended attribute SET operations */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 47eaa34f8761..a2f3cd02653c 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -51,8 +51,10 @@ ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); int __vfs_setxattr(struct dentry *, struct inode *, const char *, const void *, size_t, int); int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int); +int __vfs_setxattr_locked(struct dentry *, const char *, const void *, size_t, int, struct inode **); int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct dentry *, const char *); +int __vfs_removexattr_locked(struct dentry *, const char *, struct inode **); int vfs_removexattr(struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -- cgit v1.2.3 From cab8d289c5ad541a5351a651d95c4086b7f84d7c Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:19 +0000 Subject: xattr: add a function to check if a namespace is supported Add a function that checks is an extended attribute namespace is supported for an inode, meaning that a handler must be present for either the whole namespace, or at least one synthetic xattr in the namespace. To be used by the nfs server code when being queried for extended attributes support. Cc: linux-fsdevel@vger.kernel.org Cc: Al Viro Signed-off-by: Frank van der Linden Signed-off-by: Chuck Lever --- fs/xattr.c | 27 +++++++++++++++++++++++++++ include/linux/xattr.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/fs/xattr.c b/fs/xattr.c index 95f38f57347f..386b45676d7e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -134,6 +134,33 @@ xattr_permission(struct inode *inode, const char *name, int mask) return inode_permission(inode, mask); } +/* + * Look for any handler that deals with the specified namespace. + */ +int +xattr_supported_namespace(struct inode *inode, const char *prefix) +{ + const struct xattr_handler **handlers = inode->i_sb->s_xattr; + const struct xattr_handler *handler; + size_t preflen; + + if (!(inode->i_opflags & IOP_XATTR)) { + if (unlikely(is_bad_inode(inode))) + return -EIO; + return -EOPNOTSUPP; + } + + preflen = strlen(prefix); + + for_each_xattr_handler(handlers, handler) { + if (!strncmp(xattr_prefix(handler), prefix, preflen)) + return 0; + } + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(xattr_supported_namespace); + int __vfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags) diff --git a/include/linux/xattr.h b/include/linux/xattr.h index a2f3cd02653c..fac75810d9d3 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -61,6 +61,8 @@ ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_siz ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); +int xattr_supported_namespace(struct inode *inode, const char *prefix); + static inline const char *xattr_prefix(const struct xattr_handler *handler) { return handler->prefix ?: handler->name; -- cgit v1.2.3 From 23e50fe3a5e6045a573c69d4b0e3d78aa6183323 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Tue, 23 Jun 2020 22:39:26 +0000 Subject: nfsd: implement the xattr functions and en/decode logic Implement the main entry points for the *XATTR operations. Add functions to calculate the reply size for the user extended attribute operations, and implement the XDR encode / decode logic for these operations. Add the user extended attributes operations to nfsd4_ops. Signed-off-by: Frank van der Linden Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 120 ++++++++++++++ fs/nfsd/nfs4xdr.c | 450 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs4.h | 2 +- 3 files changed, 571 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 841aad772798..a527da3d8052 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2097,6 +2097,68 @@ out: } #endif /* CONFIG_NFSD_PNFS */ +static __be32 +nfsd4_getxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_getxattr *getxattr = &u->getxattr; + + return nfsd_getxattr(rqstp, &cstate->current_fh, + getxattr->getxa_name, &getxattr->getxa_buf, + &getxattr->getxa_len); +} + +static __be32 +nfsd4_setxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_setxattr *setxattr = &u->setxattr; + __be32 ret; + + if (opens_in_grace(SVC_NET(rqstp))) + return nfserr_grace; + + ret = nfsd_setxattr(rqstp, &cstate->current_fh, setxattr->setxa_name, + setxattr->setxa_buf, setxattr->setxa_len, + setxattr->setxa_flags); + + if (!ret) + set_change_info(&setxattr->setxa_cinfo, &cstate->current_fh); + + return ret; +} + +static __be32 +nfsd4_listxattrs(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + /* + * Get the entire list, then copy out only the user attributes + * in the encode function. + */ + return nfsd_listxattr(rqstp, &cstate->current_fh, + &u->listxattrs.lsxa_buf, &u->listxattrs.lsxa_len); +} + +static __be32 +nfsd4_removexattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_removexattr *removexattr = &u->removexattr; + __be32 ret; + + if (opens_in_grace(SVC_NET(rqstp))) + return nfserr_grace; + + ret = nfsd_removexattr(rqstp, &cstate->current_fh, + removexattr->rmxa_name); + + if (!ret) + set_change_info(&removexattr->rmxa_cinfo, &cstate->current_fh); + + return ret; +} + /* * NULL call. */ @@ -2706,6 +2768,42 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) return (op_encode_hdr_size + 3) * sizeof(__be32); } +static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 maxcount, rlen; + + maxcount = svc_max_payload(rqstp); + rlen = min_t(u32, XATTR_SIZE_MAX, maxcount); + + return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32); +} + +static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} +static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 maxcount, rlen; + + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount); + + return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32); +} + +static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + + static const struct nfsd4_operation nfsd4_ops[] = { [OP_ACCESS] = { .op_func = nfsd4_access, @@ -3087,6 +3185,28 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_COPY_NOTIFY", .op_rsize_bop = nfsd4_copy_notify_rsize, }, + [OP_GETXATTR] = { + .op_func = nfsd4_getxattr, + .op_name = "OP_GETXATTR", + .op_rsize_bop = nfsd4_getxattr_rsize, + }, + [OP_SETXATTR] = { + .op_func = nfsd4_setxattr, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_SETXATTR", + .op_rsize_bop = nfsd4_setxattr_rsize, + }, + [OP_LISTXATTRS] = { + .op_func = nfsd4_listxattrs, + .op_name = "OP_LISTXATTRS", + .op_rsize_bop = nfsd4_listxattrs_rsize, + }, + [OP_REMOVEXATTR] = { + .op_func = nfsd4_removexattr, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_REMOVEXATTR", + .op_rsize_bop = nfsd4_removexattr_rsize, + }, }; /** diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 48806b493eba..8bacc0ceae19 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include "idmap.h" #include "acl.h" @@ -1877,6 +1879,208 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) DECODE_TAIL; } +/* + * XDR data that is more than PAGE_SIZE in size is normally part of a + * read or write. However, the size of extended attributes is limited + * by the maximum request size, and then further limited by the underlying + * filesystem limits. This can exceed PAGE_SIZE (currently, XATTR_SIZE_MAX + * is 64k). Since there is no kvec- or page-based interface to xattrs, + * and we're not dealing with contiguous pages, we need to do some copying. + */ + +/* + * Decode data into buffer. Uses head and pages constructed by + * svcxdr_construct_vector. + */ +static __be32 +nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head, + struct page **pages, char **bufp, u32 buflen) +{ + char *tmp, *dp; + u32 len; + + if (buflen <= head->iov_len) { + /* + * We're in luck, the head has enough space. Just return + * the head, no need for copying. + */ + *bufp = head->iov_base; + return 0; + } + + tmp = svcxdr_tmpalloc(argp, buflen); + if (tmp == NULL) + return nfserr_jukebox; + + dp = tmp; + memcpy(dp, head->iov_base, head->iov_len); + buflen -= head->iov_len; + dp += head->iov_len; + + while (buflen > 0) { + len = min_t(u32, buflen, PAGE_SIZE); + memcpy(dp, page_address(*pages), len); + + buflen -= len; + dp += len; + pages++; + } + + *bufp = tmp; + return 0; +} + +/* + * Get a user extended attribute name from the XDR buffer. + * It will not have the "user." prefix, so prepend it. + * Lastly, check for nul characters in the name. + */ +static __be32 +nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) +{ + DECODE_HEAD; + char *name, *sp, *dp; + u32 namelen, cnt; + + READ_BUF(4); + namelen = be32_to_cpup(p++); + + if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) + return nfserr_nametoolong; + + if (namelen == 0) + goto xdr_error; + + READ_BUF(namelen); + + name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1); + if (!name) + return nfserr_jukebox; + + memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); + + /* + * Copy the extended attribute name over while checking for 0 + * characters. + */ + sp = (char *)p; + dp = name + XATTR_USER_PREFIX_LEN; + cnt = namelen; + + while (cnt-- > 0) { + if (*sp == '\0') + goto xdr_error; + *dp++ = *sp++; + } + *dp = '\0'; + + *namep = name; + + DECODE_TAIL; +} + +/* + * A GETXATTR op request comes without a length specifier. We just set the + * maximum length for the reply based on XATTR_SIZE_MAX and the maximum + * channel reply size. nfsd_getxattr will probe the length of the xattr, + * check it against getxa_len, and allocate + return the value. + */ +static __be32 +nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, + struct nfsd4_getxattr *getxattr) +{ + __be32 status; + u32 maxcount; + + status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name); + if (status) + return status; + + maxcount = svc_max_payload(argp->rqstp); + maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); + + getxattr->getxa_len = maxcount; + + return status; +} + +static __be32 +nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, + struct nfsd4_setxattr *setxattr) +{ + DECODE_HEAD; + u32 flags, maxcount, size; + struct kvec head; + struct page **pagelist; + + READ_BUF(4); + flags = be32_to_cpup(p++); + + if (flags > SETXATTR4_REPLACE) + return nfserr_inval; + setxattr->setxa_flags = flags; + + status = nfsd4_decode_xattr_name(argp, &setxattr->setxa_name); + if (status) + return status; + + maxcount = svc_max_payload(argp->rqstp); + maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); + + READ_BUF(4); + size = be32_to_cpup(p++); + if (size > maxcount) + return nfserr_xattr2big; + + setxattr->setxa_len = size; + if (size > 0) { + status = svcxdr_construct_vector(argp, &head, &pagelist, size); + if (status) + return status; + + status = nfsd4_vbuf_from_vector(argp, &head, pagelist, + &setxattr->setxa_buf, size); + } + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, + struct nfsd4_listxattrs *listxattrs) +{ + DECODE_HEAD; + u32 maxcount; + + READ_BUF(12); + p = xdr_decode_hyper(p, &listxattrs->lsxa_cookie); + + /* + * If the cookie is too large to have even one user.x attribute + * plus trailing '\0' left in a maximum size buffer, it's invalid. + */ + if (listxattrs->lsxa_cookie >= + (XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2))) + return nfserr_badcookie; + + maxcount = be32_to_cpup(p++); + if (maxcount < 8) + /* Always need at least 2 words (length and one character) */ + return nfserr_inval; + + maxcount = min(maxcount, svc_max_payload(argp->rqstp)); + listxattrs->lsxa_maxcount = maxcount; + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, + struct nfsd4_removexattr *removexattr) +{ + return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); +} + static __be32 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) { @@ -1973,6 +2177,11 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, + /* RFC 8276 extended atributes operations */ + [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, + [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, + [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, }; static inline bool @@ -4458,6 +4667,241 @@ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) return nfserr; } +/* + * Encode kmalloc-ed buffer in to XDR stream. + */ +static int +nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) +{ + u32 cplen; + __be32 *p; + + cplen = min_t(unsigned long, buflen, + ((void *)xdr->end - (void *)xdr->p)); + p = xdr_reserve_space(xdr, cplen); + if (!p) + return nfserr_resource; + + memcpy(p, buf, cplen); + buf += cplen; + buflen -= cplen; + + while (buflen) { + cplen = min_t(u32, buflen, PAGE_SIZE); + p = xdr_reserve_space(xdr, cplen); + if (!p) + return nfserr_resource; + + memcpy(p, buf, cplen); + + if (cplen < PAGE_SIZE) { + /* + * We're done, with a length that wasn't page + * aligned, so possibly not word aligned. Pad + * any trailing bytes with 0. + */ + xdr_encode_opaque_fixed(p, NULL, cplen); + break; + } + + buflen -= PAGE_SIZE; + buf += PAGE_SIZE; + } + + return 0; +} + +static __be32 +nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_getxattr *getxattr) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p, err; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + + *p = cpu_to_be32(getxattr->getxa_len); + + if (getxattr->getxa_len == 0) + return 0; + + err = nfsd4_vbuf_to_stream(xdr, getxattr->getxa_buf, + getxattr->getxa_len); + + kvfree(getxattr->getxa_buf); + + return err; +} + +static __be32 +nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_setxattr *setxattr) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + + encode_cinfo(p, &setxattr->setxa_cinfo); + + return 0; +} + +/* + * See if there are cookie values that can be rejected outright. + */ +static __be32 +nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, + u32 *offsetp) +{ + u64 cookie = listxattrs->lsxa_cookie; + + /* + * If the cookie is larger than the maximum number we can fit + * in either the buffer we just got back from vfs_listxattr, or, + * XDR-encoded, in the return buffer, it's invalid. + */ + if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2)) + return nfserr_badcookie; + + if (cookie > (listxattrs->lsxa_maxcount / + (XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4))) + return nfserr_badcookie; + + *offsetp = (u32)cookie; + return 0; +} + +static __be32 +nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_listxattrs *listxattrs) +{ + struct xdr_stream *xdr = &resp->xdr; + u32 cookie_offset, count_offset, eof; + u32 left, xdrleft, slen, count; + u32 xdrlen, offset; + u64 cookie; + char *sp; + __be32 status; + __be32 *p; + u32 nuser; + + eof = 1; + + status = nfsd4_listxattr_validate_cookie(listxattrs, &offset); + if (status) + goto out; + + /* + * Reserve space for the cookie and the name array count. Record + * the offsets to save them later. + */ + cookie_offset = xdr->buf->len; + count_offset = cookie_offset + 8; + p = xdr_reserve_space(xdr, 12); + if (!p) { + status = nfserr_resource; + goto out; + } + + count = 0; + left = listxattrs->lsxa_len; + sp = listxattrs->lsxa_buf; + nuser = 0; + + xdrleft = listxattrs->lsxa_maxcount; + + while (left > 0 && xdrleft > 0) { + slen = strlen(sp); + + /* + * Check if this a user. attribute, skip it if not. + */ + if (strncmp(sp, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + goto contloop; + + slen -= XATTR_USER_PREFIX_LEN; + xdrlen = 4 + ((slen + 3) & ~3); + if (xdrlen > xdrleft) { + if (count == 0) { + /* + * Can't even fit the first attribute name. + */ + status = nfserr_toosmall; + goto out; + } + eof = 0; + goto wreof; + } + + left -= XATTR_USER_PREFIX_LEN; + sp += XATTR_USER_PREFIX_LEN; + if (nuser++ < offset) + goto contloop; + + + p = xdr_reserve_space(xdr, xdrlen); + if (!p) { + status = nfserr_resource; + goto out; + } + + p = xdr_encode_opaque(p, sp, slen); + + xdrleft -= xdrlen; + count++; +contloop: + sp += slen + 1; + left -= slen + 1; + } + + /* + * If there were user attributes to copy, but we didn't copy + * any, the offset was too large (e.g. the cookie was invalid). + */ + if (nuser > 0 && count == 0) { + status = nfserr_badcookie; + goto out; + } + +wreof: + p = xdr_reserve_space(xdr, 4); + if (!p) { + status = nfserr_resource; + goto out; + } + *p = cpu_to_be32(eof); + + cookie = offset + count; + + write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8); + count = htonl(count); + write_bytes_to_xdr_buf(xdr->buf, count_offset, &count, 4); +out: + if (listxattrs->lsxa_len) + kvfree(listxattrs->lsxa_buf); + return status; +} + +static __be32 +nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_removexattr *removexattr) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + + p = encode_cinfo(p, &removexattr->rmxa_cinfo); + return 0; +} + typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); /* @@ -4547,6 +4991,12 @@ static const nfsd4_enc nfsd4_enc_ops[] = { [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, + + /* RFC 8276 extended atributes operations */ + [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, + [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, + [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, }; /* diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index e6ca9d1d2e76..33ebe476428e 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -165,7 +165,7 @@ Needs to be updated if more operations are defined in future.*/ #define FIRST_NFS4_OP OP_ACCESS #define LAST_NFS40_OP OP_RELEASE_LOCKOWNER #define LAST_NFS41_OP OP_RECLAIM_COMPLETE -#define LAST_NFS42_OP OP_CLONE +#define LAST_NFS42_OP OP_REMOVEXATTR #define LAST_NFS4_OP LAST_NFS42_OP enum nfsstat4 { -- cgit v1.2.3 From 10b9d99a3dbbf5e9af838d1887a1047875dcafd9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 18:30:42 -0400 Subject: SUNRPC: Augment server-side rpcgss tracepoints Add similar tracepoints to those that were recently added on the client side to track failures in the integ and priv unwrap paths. And, let's collect the seqno-specific tracepoints together with a common naming convention. Regarding the gss_check_seq_num() changes: everywhere else treats the GSS sequence number as an unsigned 32-bit integer. As far back as 2.6.12, I couldn't find a compelling reason to do things differently here. As a defensive change it's better to eliminate needless implicit sign conversions. Signed-off-by: Chuck Lever --- include/trace/events/rpcgss.h | 168 ++++++++++++++++++++++++++++++++------ net/sunrpc/auth_gss/svcauth_gss.c | 117 +++++++++++++++++--------- net/sunrpc/auth_gss/trace.c | 3 + 3 files changed, 224 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index b9b51a4b1db1..ffdbe6f85da8 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -170,55 +170,144 @@ DECLARE_EVENT_CLASS(rpcgss_ctx_class, DEFINE_CTX_EVENT(init); DEFINE_CTX_EVENT(destroy); +DECLARE_EVENT_CLASS(rpcgss_svc_gssapi_class, + TP_PROTO( + const struct svc_rqst *rqstp, + u32 maj_stat + ), + + TP_ARGS(rqstp, maj_stat), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, maj_stat) + __string(addr, rqstp->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xid = __be32_to_cpu(rqstp->rq_xid); + __entry->maj_stat = maj_stat; + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s xid=0x%08x maj_stat=%s", + __get_str(addr), __entry->xid, + __entry->maj_stat == 0 ? + "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat)) +); + +#define DEFINE_SVC_GSSAPI_EVENT(name) \ + DEFINE_EVENT(rpcgss_svc_gssapi_class, rpcgss_svc_##name, \ + TP_PROTO( \ + const struct svc_rqst *rqstp, \ + u32 maj_stat \ + ), \ + TP_ARGS(rqstp, maj_stat)) + +DEFINE_SVC_GSSAPI_EVENT(unwrap); +DEFINE_SVC_GSSAPI_EVENT(mic); + +TRACE_EVENT(rpcgss_svc_unwrap_failed, + TP_PROTO( + const struct svc_rqst *rqstp + ), + + TP_ARGS(rqstp), + + TP_STRUCT__entry( + __field(u32, xid) + __string(addr, rqstp->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid) +); + +TRACE_EVENT(rpcgss_svc_seqno_bad, + TP_PROTO( + const struct svc_rqst *rqstp, + u32 expected, + u32 received + ), + + TP_ARGS(rqstp, expected, received), + + TP_STRUCT__entry( + __field(u32, expected) + __field(u32, received) + __field(u32, xid) + __string(addr, rqstp->rq_xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->expected = expected; + __entry->received = received; + __entry->xid = __be32_to_cpu(rqstp->rq_xid); + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s xid=0x%08x expected seqno %u, received seqno %u", + __get_str(addr), __entry->xid, + __entry->expected, __entry->received) +); + TRACE_EVENT(rpcgss_svc_accept_upcall, TP_PROTO( - __be32 xid, + const struct svc_rqst *rqstp, u32 major_status, u32 minor_status ), - TP_ARGS(xid, major_status, minor_status), + TP_ARGS(rqstp, major_status, minor_status), TP_STRUCT__entry( - __field(u32, xid) __field(u32, minor_status) __field(unsigned long, major_status) + __field(u32, xid) + __string(addr, rqstp->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xid = be32_to_cpu(xid); __entry->minor_status = minor_status; __entry->major_status = major_status; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); ), - TP_printk("xid=0x%08x major_status=%s (0x%08lx) minor_status=%u", - __entry->xid, __entry->major_status == 0 ? "GSS_S_COMPLETE" : - show_gss_status(__entry->major_status), + TP_printk("addr=%s xid=0x%08x major_status=%s (0x%08lx) minor_status=%u", + __get_str(addr), __entry->xid, + (__entry->major_status == 0) ? "GSS_S_COMPLETE" : + show_gss_status(__entry->major_status), __entry->major_status, __entry->minor_status ) ); -TRACE_EVENT(rpcgss_svc_accept, +TRACE_EVENT(rpcgss_svc_authenticate, TP_PROTO( - __be32 xid, - size_t len + const struct svc_rqst *rqstp, + const struct rpc_gss_wire_cred *gc ), - TP_ARGS(xid, len), + TP_ARGS(rqstp, gc), TP_STRUCT__entry( + __field(u32, seqno) __field(u32, xid) - __field(size_t, len) + __string(addr, rqstp->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xid = be32_to_cpu(xid); - __entry->len = len; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->seqno = gc->gc_seq; + __assign_str(addr, rqstp->rq_xprt->xpt_remotebuf); ), - TP_printk("xid=0x%08x len=%zu", - __entry->xid, __entry->len - ) + TP_printk("addr=%s xid=0x%08x seqno=%u", __get_str(addr), + __entry->xid, __entry->seqno) ); @@ -371,11 +460,11 @@ TRACE_EVENT(rpcgss_update_slack, DECLARE_EVENT_CLASS(rpcgss_svc_seqno_class, TP_PROTO( - __be32 xid, + const struct svc_rqst *rqstp, u32 seqno ), - TP_ARGS(xid, seqno), + TP_ARGS(rqstp, seqno), TP_STRUCT__entry( __field(u32, xid) @@ -383,25 +472,52 @@ DECLARE_EVENT_CLASS(rpcgss_svc_seqno_class, ), TP_fast_assign( - __entry->xid = be32_to_cpu(xid); + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->seqno = seqno; ), - TP_printk("xid=0x%08x seqno=%u, request discarded", + TP_printk("xid=0x%08x seqno=%u", __entry->xid, __entry->seqno) ); #define DEFINE_SVC_SEQNO_EVENT(name) \ - DEFINE_EVENT(rpcgss_svc_seqno_class, rpcgss_svc_##name, \ + DEFINE_EVENT(rpcgss_svc_seqno_class, rpcgss_svc_seqno_##name, \ TP_PROTO( \ - __be32 xid, \ + const struct svc_rqst *rqstp, \ u32 seqno \ ), \ - TP_ARGS(xid, seqno)) + TP_ARGS(rqstp, seqno)) -DEFINE_SVC_SEQNO_EVENT(large_seqno); -DEFINE_SVC_SEQNO_EVENT(old_seqno); +DEFINE_SVC_SEQNO_EVENT(large); +DEFINE_SVC_SEQNO_EVENT(seen); +TRACE_EVENT(rpcgss_svc_seqno_low, + TP_PROTO( + const struct svc_rqst *rqstp, + u32 seqno, + u32 min, + u32 max + ), + + TP_ARGS(rqstp, seqno, min, max), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, seqno) + __field(u32, min) + __field(u32, max) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->seqno = seqno; + __entry->min = min; + __entry->max = max; + ), + + TP_printk("xid=0x%08x seqno=%u window=[%u..%u]", + __entry->xid, __entry->seqno, __entry->min, __entry->max) +); /** ** gssd upcall related trace events diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 46027d0c903f..7d83f54aaaa6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -332,7 +332,7 @@ static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct r struct gss_svc_seq_data { /* highest seq number seen so far: */ - int sd_max; + u32 sd_max; /* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of * sd_win is nonzero iff sequence number i has been seen already: */ unsigned long sd_win[GSS_SEQ_WIN/BITS_PER_LONG]; @@ -613,16 +613,29 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) return found; } -/* Implements sequence number algorithm as specified in RFC 2203. */ -static int -gss_check_seq_num(struct rsc *rsci, int seq_num) +/** + * gss_check_seq_num - GSS sequence number window check + * @rqstp: RPC Call to use when reporting errors + * @rsci: cached GSS context state (updated on return) + * @seq_num: sequence number to check + * + * Implements sequence number algorithm as specified in + * RFC 2203, Section 5.3.3.1. "Context Management". + * + * Return values: + * %true: @rqstp's GSS sequence number is inside the window + * %false: @rqstp's GSS sequence number is outside the window + */ +static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci, + u32 seq_num) { struct gss_svc_seq_data *sd = &rsci->seqdata; + bool result = false; spin_lock(&sd->sd_lock); if (seq_num > sd->sd_max) { if (seq_num >= sd->sd_max + GSS_SEQ_WIN) { - memset(sd->sd_win,0,sizeof(sd->sd_win)); + memset(sd->sd_win, 0, sizeof(sd->sd_win)); sd->sd_max = seq_num; } else while (sd->sd_max < seq_num) { sd->sd_max++; @@ -631,17 +644,25 @@ gss_check_seq_num(struct rsc *rsci, int seq_num) __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win); goto ok; } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) { - goto drop; + goto toolow; } - /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */ if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win)) - goto drop; + goto alreadyseen; + ok: + result = true; +out: spin_unlock(&sd->sd_lock); - return 1; -drop: - spin_unlock(&sd->sd_lock); - return 0; + return result; + +toolow: + trace_rpcgss_svc_seqno_low(rqstp, seq_num, + sd->sd_max - GSS_SEQ_WIN, + sd->sd_max); + goto out; +alreadyseen: + trace_rpcgss_svc_seqno_seen(rqstp, seq_num); + goto out; } static inline u32 round_up_to_quad(u32 i) @@ -721,14 +742,12 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, } if (gc->gc_seq > MAXSEQ) { - trace_rpcgss_svc_large_seqno(rqstp->rq_xid, gc->gc_seq); + trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq); *authp = rpcsec_gsserr_ctxproblem; return SVC_DENIED; } - if (!gss_check_seq_num(rsci, gc->gc_seq)) { - trace_rpcgss_svc_old_seqno(rqstp->rq_xid, gc->gc_seq); + if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq)) return SVC_DROP; - } return SVC_OK; } @@ -866,11 +885,13 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) static int unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { + u32 integ_len, rseqno, maj_stat; int stat = -EINVAL; - u32 integ_len, maj_stat; struct xdr_netobj mic; struct xdr_buf integ_buf; + mic.data = NULL; + /* NFS READ normally uses splice to send data in-place. However * the data in cache can change after the reply's MIC is computed * but before the RPC reply is sent. To prevent the client from @@ -885,34 +906,44 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) - return stat; + goto unwrap_failed; if (integ_len > buf->len) - return stat; - if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) { - WARN_ON_ONCE(1); - return stat; - } + goto unwrap_failed; + if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) + goto unwrap_failed; + /* copy out mic... */ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) - return stat; + goto unwrap_failed; if (mic.len > RPC_MAX_AUTH_SIZE) - return stat; + goto unwrap_failed; mic.data = kmalloc(mic.len, GFP_KERNEL); if (!mic.data) - return stat; + goto unwrap_failed; if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) - goto out; + goto unwrap_failed; maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); if (maj_stat != GSS_S_COMPLETE) - goto out; - if (svc_getnl(&buf->head[0]) != seq) - goto out; + goto bad_mic; + rseqno = svc_getnl(&buf->head[0]); + if (rseqno != seq) + goto bad_seqno; /* trim off the mic and padding at the end before returning */ xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); stat = 0; out: kfree(mic.data); return stat; + +unwrap_failed: + trace_rpcgss_svc_unwrap_failed(rqstp); + goto out; +bad_seqno: + trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno); + goto out; +bad_mic: + trace_rpcgss_svc_mic(rqstp, maj_stat); + goto out; } static inline int @@ -937,6 +968,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs { u32 priv_len, maj_stat; int pad, remaining_len, offset; + u32 rseqno; clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); @@ -951,7 +983,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs * not yet read from the head, so these two values are different: */ remaining_len = total_buf_len(buf); if (priv_len > remaining_len) - return -EINVAL; + goto unwrap_failed; pad = remaining_len - priv_len; buf->len -= pad; fix_priv_head(buf, pad); @@ -972,11 +1004,22 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs fix_priv_head(buf, pad); } if (maj_stat != GSS_S_COMPLETE) - return -EINVAL; + goto bad_unwrap; out_seq: - if (svc_getnl(&buf->head[0]) != seq) - return -EINVAL; + rseqno = svc_getnl(&buf->head[0]); + if (rseqno != seq) + goto bad_seqno; return 0; + +unwrap_failed: + trace_rpcgss_svc_unwrap_failed(rqstp); + return -EINVAL; +bad_seqno: + trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno); + return -EINVAL; +bad_unwrap: + trace_rpcgss_svc_unwrap(rqstp, maj_stat); + return -EINVAL; } struct gss_svc_data { @@ -1314,8 +1357,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, if (status) goto out; - trace_rpcgss_svc_accept_upcall(rqstp->rq_xid, ud.major_status, - ud.minor_status); + trace_rpcgss_svc_accept_upcall(rqstp, ud.major_status, ud.minor_status); switch (ud.major_status) { case GSS_S_CONTINUE_NEEDED: @@ -1490,8 +1532,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) int ret; struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); - trace_rpcgss_svc_accept(rqstp->rq_xid, argv->iov_len); - *authp = rpc_autherr_badcred; if (!svcdata) svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); @@ -1608,6 +1648,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) GSS_C_QOP_DEFAULT, gc->gc_svc); ret = SVC_OK; + trace_rpcgss_svc_authenticate(rqstp, gc); goto out; } garbage_args: diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c index 49fa583d7f91..d26036a57443 100644 --- a/net/sunrpc/auth_gss/trace.c +++ b/net/sunrpc/auth_gss/trace.c @@ -5,6 +5,9 @@ #include #include +#include +#include +#include #include #include -- cgit v1.2.3 From c65b326b1eb983bca35ed43d0e453d1b15705f10 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 25 Mar 2020 14:41:46 -0400 Subject: svcrdma: Make svc_rdma_send_error_msg() a global function Prepare for svc_rdma_send_error_msg() to be invoked from another source file. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 ++++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 28 +++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ed82625dc0b..1579f7a14ab4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -195,6 +195,10 @@ extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, struct xdr_buf *xdr); +extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + struct svc_rdma_recv_ctxt *rctxt, + int status); extern int svc_rdma_sendto(struct svc_rqst *); extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index fb548b548c4b..57041298fe4f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -804,16 +804,25 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, return svc_rdma_send(rdma, &sctxt->sc_send_wr); } -/* Given the client-provided Write and Reply chunks, the server was not - * able to form a complete reply. Return an RDMA_ERROR message so the - * client can retire this RPC transaction. - * - * Remote Invalidation is skipped for simplicity. +/** + * svc_rdma_send_error_msg - Send an RPC/RDMA v1 error response + * @rdma: controlling transport context + * @sctxt: Send context for the response + * @rctxt: Receive context for incoming bad message + * @status: negative errno indicating error that occurred + * + * Given the client-provided Read, Write, and Reply chunks, the + * server was not able to parse the Call or form a complete Reply. + * Return an RDMA_ERROR message so the client can retire the RPC + * transaction. + * + * The caller does not have to release @sctxt. It is released by + * Send completion, or by this function on error. */ -static void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *sctxt, - struct svc_rdma_recv_ctxt *rctxt, - int status) +void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + struct svc_rdma_recv_ctxt *rctxt, + int status) { __be32 *rdma_argp = rctxt->rc_recv_buf; __be32 *p; @@ -852,6 +861,7 @@ static void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, trace_svcrdma_err_chunk(*rdma_argp); } + /* Remote Invalidation is skipped for simplicity. */ sctxt->sc_send_wr.num_sge = 1; sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; -- cgit v1.2.3 From 3f8f25c696f9c4e352a4d705ba767af676421564 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Apr 2020 14:17:40 -0400 Subject: svcrdma: Clean up trace_svcrdma_send_failed() tracepoint - Use the _err naming convention instead - Remove display of kernel memory address of the controlling xprt Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 7 ++----- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 0f05a6e2b9cb..0eff80dee066 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1716,7 +1716,7 @@ TRACE_EVENT(svcrdma_send_pullup, TP_printk("len=%u", __entry->len) ); -TRACE_EVENT(svcrdma_send_failed, +TRACE_EVENT(svcrdma_send_err, TP_PROTO( const struct svc_rqst *rqst, int status @@ -1727,19 +1727,16 @@ TRACE_EVENT(svcrdma_send_failed, TP_STRUCT__entry( __field(int, status) __field(u32, xid) - __field(const void *, xprt) __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( __entry->status = status; __entry->xid = __be32_to_cpu(rqst->rq_xid); - __entry->xprt = rqst->rq_xprt; __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s xid=0x%08x status=%d", - __entry->xprt, __get_str(addr), + TP_printk("addr=%s xid=0x%08x status=%d", __get_str(addr), __entry->xid, __entry->status ) ); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 57041298fe4f..f985f548346a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -971,7 +971,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) err1: svc_rdma_send_ctxt_put(rdma, sctxt); err0: - trace_svcrdma_send_failed(rqstp, ret); + trace_svcrdma_send_err(rqstp, ret); set_bit(XPT_CLOSE, &xprt->xpt_flags); return -ENOTCONN; } -- cgit v1.2.3 From 0b8dc1b69995cbd81c2c9a2f1730c46cce085f62 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 May 2020 11:34:47 -0400 Subject: svcrdma: Remove declarations for functions long removed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pavane pour une infante défunte. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1579f7a14ab4..d28ca1b6f2eb 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -204,10 +204,6 @@ extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); /* svc_rdma_transport.c */ -extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); -extern void svc_sq_reap(struct svcxprt_rdma *); -extern void svc_rq_reap(struct svcxprt_rdma *); - extern struct svc_xprt_class svc_rdma_class; #ifdef CONFIG_SUNRPC_BACKCHANNEL extern struct svc_xprt_class svc_rdma_bc_class; -- cgit v1.2.3 From 07e9a6325a35fb9655f7b52e2b9dc632da6eef51 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 28 Mar 2020 13:43:22 -0400 Subject: SUNRPC: Add helpers for decoding list discriminators symbolically Use these helpers in a few spots to demonstrate their use. The remaining open-coded discriminator checks in rpcrdma will be addressed in subsequent patches. Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 26 ++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 12 ++++++------ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 17 ++++++++--------- 3 files changed, 40 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 22c207b2425f..5a6a81b7cd9f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -474,6 +474,32 @@ xdr_stream_encode_uint32_array(struct xdr_stream *xdr, return ret; } +/** + * xdr_item_is_absent - symbolically handle XDR discriminators + * @p: pointer to undecoded discriminator + * + * Return values: + * %true if the following XDR item is absent + * %false if the following XDR item is present + */ +static inline bool xdr_item_is_absent(const __be32 *p) +{ + return *p == xdr_zero; +} + +/** + * xdr_item_is_present - symbolically handle XDR discriminators + * @p: pointer to undecoded discriminator + * + * Return values: + * %true if the following XDR item is present + * %false if the following XDR item is absent + */ +static inline bool xdr_item_is_present(const __be32 *p) +{ + return *p != xdr_zero; +} + /** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 935bbef2f7be..feecd1f55f18 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1133,11 +1133,11 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) p = xdr_inline_decode(xdr, 0); /* Chunk lists */ - if (*p++ != xdr_zero) + if (xdr_item_is_present(p++)) return false; - if (*p++ != xdr_zero) + if (xdr_item_is_present(p++)) return false; - if (*p++ != xdr_zero) + if (xdr_item_is_present(p++)) return false; /* RPC header */ @@ -1215,7 +1215,7 @@ static int decode_read_list(struct xdr_stream *xdr) p = xdr_inline_decode(xdr, sizeof(*p)); if (unlikely(!p)) return -EIO; - if (unlikely(*p != xdr_zero)) + if (unlikely(xdr_item_is_present(p))) return -EIO; return 0; } @@ -1234,7 +1234,7 @@ static int decode_write_list(struct xdr_stream *xdr, u32 *length) p = xdr_inline_decode(xdr, sizeof(*p)); if (unlikely(!p)) return -EIO; - if (*p == xdr_zero) + if (xdr_item_is_absent(p)) break; if (!first) return -EIO; @@ -1256,7 +1256,7 @@ static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length) return -EIO; *length = 0; - if (*p != xdr_zero) + if (xdr_item_is_present(p)) if (decode_write_chunk(xdr, length)) return -EIO; return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index c072ce61b393..5e78067889f3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -419,7 +419,7 @@ static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt) len = 0; first = true; - while (*p != xdr_zero) { + while (xdr_item_is_present(p)) { p = xdr_inline_decode(&rctxt->rc_stream, rpcrdma_readseg_maxsz * sizeof(*p)); if (!p) @@ -500,7 +500,7 @@ static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt) if (!p) return false; rctxt->rc_write_list = p; - while (*p != xdr_zero) { + while (xdr_item_is_present(p)) { if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK)) return false; ++chcount; @@ -532,12 +532,11 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); if (!p) return false; - rctxt->rc_reply_chunk = p; - if (*p != xdr_zero) { + rctxt->rc_reply_chunk = NULL; + if (xdr_item_is_present(p)) { if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK)) return false; - } else { - rctxt->rc_reply_chunk = NULL; + rctxt->rc_reply_chunk = p; } return true; } @@ -568,7 +567,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, p += rpcrdma_fixed_maxsz; /* Read list */ - while (*p++ != xdr_zero) { + while (xdr_item_is_present(p++)) { p++; /* position */ if (inv_rkey == xdr_zero) inv_rkey = *p; @@ -578,7 +577,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, } /* Write list */ - while (*p++ != xdr_zero) { + while (xdr_item_is_present(p++)) { segcount = be32_to_cpup(p++); for (i = 0; i < segcount; i++) { if (inv_rkey == xdr_zero) @@ -590,7 +589,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, } /* Reply chunk */ - if (*p++ != xdr_zero) { + if (xdr_item_is_present(p++)) { segcount = be32_to_cpup(p++); for (i = 0; i < segcount; i++) { if (inv_rkey == xdr_zero) -- cgit v1.2.3 From f60a08697d28b138c73b14c3204947bc8e637197 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 29 Mar 2020 16:44:13 -0400 Subject: svcrdma: Add common XDR decoders for RDMA and Read segments Clean up: De-duplicate some code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/rpc_rdma.h | 37 ++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/frwr_ops.c | 1 - net/sunrpc/xprtrdma/rpc_rdma.c | 5 +---- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 4 +--- net/sunrpc/xprtrdma/svc_rdma_rw.c | 37 ++++++++++++++------------------ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 5 +---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 - 7 files changed, 56 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 320c672d84de..db50380f64f4 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -124,4 +124,41 @@ rpcrdma_decode_buffer_size(u8 val) return ((unsigned int)val + 1) << 10; } +/** + * xdr_decode_rdma_segment - Decode contents of an RDMA segment + * @p: Pointer to the undecoded RDMA segment + * @handle: Upon return, the RDMA handle + * @length: Upon return, the RDMA length + * @offset: Upon return, the RDMA offset + * + * Return value: + * Pointer to the XDR item that follows the RDMA segment + */ +static inline __be32 *xdr_decode_rdma_segment(__be32 *p, u32 *handle, + u32 *length, u64 *offset) +{ + *handle = be32_to_cpup(p++); + *length = be32_to_cpup(p++); + return xdr_decode_hyper(p, offset); +} + +/** + * xdr_decode_read_segment - Decode contents of a Read segment + * @p: Pointer to the undecoded Read segment + * @position: Upon return, the segment's position + * @handle: Upon return, the RDMA handle + * @length: Upon return, the RDMA length + * @offset: Upon return, the RDMA offset + * + * Return value: + * Pointer to the XDR item that follows the Read segment + */ +static inline __be32 *xdr_decode_read_segment(__be32 *p, u32 *position, + u32 *handle, u32 *length, + u64 *offset) +{ + *position = be32_to_cpup(p++); + return xdr_decode_rdma_segment(p, handle, length, offset); +} + #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index b647562a26dd..7f94c9a19fd3 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -40,7 +40,6 @@ * New MRs are created on demand. */ -#include #include #include "xprt_rdma.h" diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index feecd1f55f18..5461f01eeca6 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1176,10 +1176,7 @@ static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) if (unlikely(!p)) return -EIO; - handle = be32_to_cpup(p++); - *length = be32_to_cpup(p++); - xdr_decode_hyper(p, &offset); - + xdr_decode_rdma_segment(p, &handle, length, &offset); trace_xprtrdma_decode_seg(handle, *length, offset); return 0; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 5e78067889f3..c0587d3cd389 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -466,9 +466,7 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen) if (!p) return false; - handle = be32_to_cpup(p++); - length = be32_to_cpup(p++); - xdr_decode_hyper(p, &offset); + xdr_decode_rdma_segment(p, &handle, &length, &offset); trace_svcrdma_decode_wseg(handle, length, offset); total += length; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 83806fa94def..2038b1b286dd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -7,6 +7,7 @@ #include +#include #include #include @@ -441,34 +442,32 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz; do { unsigned int write_len; - u32 seg_length, seg_handle; - u64 seg_offset; + u32 handle, length; + u64 offset; if (info->wi_seg_no >= info->wi_nsegs) goto out_overflow; - seg_handle = be32_to_cpup(seg); - seg_length = be32_to_cpup(seg + 1); - xdr_decode_hyper(seg + 2, &seg_offset); - seg_offset += info->wi_seg_off; + xdr_decode_rdma_segment(seg, &handle, &length, &offset); + offset += info->wi_seg_off; - write_len = min(remaining, seg_length - info->wi_seg_off); + write_len = min(remaining, length - info->wi_seg_off); ctxt = svc_rdma_get_rw_ctxt(rdma, (write_len >> PAGE_SHIFT) + 2); if (!ctxt) return -ENOMEM; constructor(info, write_len, ctxt); - ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle, + ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, handle, DMA_TO_DEVICE); if (ret < 0) return -EIO; - trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset); + trace_svcrdma_send_wseg(handle, write_len, offset); list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; - if (write_len == seg_length - info->wi_seg_off) { + if (write_len == length - info->wi_seg_off) { seg += 4; info->wi_seg_no++; info->wi_seg_off = 0; @@ -689,21 +688,17 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, ret = -EINVAL; info->ri_chunklen = 0; while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) { - u32 rs_handle, rs_length; - u64 rs_offset; + u32 handle, length; + u64 offset; - rs_handle = be32_to_cpup(p++); - rs_length = be32_to_cpup(p++); - p = xdr_decode_hyper(p, &rs_offset); - - ret = svc_rdma_build_read_segment(info, rqstp, - rs_handle, rs_length, - rs_offset); + p = xdr_decode_rdma_segment(p, &handle, &length, &offset); + ret = svc_rdma_build_read_segment(info, rqstp, handle, length, + offset); if (ret < 0) break; - trace_svcrdma_send_rseg(rs_handle, rs_length, rs_offset); - info->ri_chunklen += rs_length; + trace_svcrdma_send_rseg(handle, length, offset); + info->ri_chunklen += length; } return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f985f548346a..a78f1d22e9bb 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -106,7 +106,6 @@ #include #include -#include #include #include "xprt_rdma.h" @@ -375,9 +374,7 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src, if (!p) return -EMSGSIZE; - handle = be32_to_cpup(src++); - length = be32_to_cpup(src++); - xdr_decode_hyper(src, &offset); + xdr_decode_rdma_segment(src, &handle, &length, &offset); *p++ = cpu_to_be32(handle); if (*remaining < length) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index d38be57b00ed..3da7901a49e6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -55,7 +55,6 @@ #include #include -#include #include #include -- cgit v1.2.3 From 379c3bc6b4eb989ee37c4ce8ab403719e06fe35f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Apr 2020 15:32:14 -0400 Subject: svcrdma: Add common XDR encoders for RDMA and Read segments Clean up: De-duplicate some code. Signed-off-by: Chuck Lever --- include/linux/sunrpc/rpc_rdma.h | 37 +++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/rpc_rdma.c | 14 +++---------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 4 +--- 3 files changed, 41 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index db50380f64f4..4af31bbc8802 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -124,6 +124,43 @@ rpcrdma_decode_buffer_size(u8 val) return ((unsigned int)val + 1) << 10; } +/** + * xdr_encode_rdma_segment - Encode contents of an RDMA segment + * @p: Pointer into a send buffer + * @handle: The RDMA handle to encode + * @length: The RDMA length to encode + * @offset: The RDMA offset to encode + * + * Return value: + * Pointer to the XDR position that follows the encoded RDMA segment + */ +static inline __be32 *xdr_encode_rdma_segment(__be32 *p, u32 handle, + u32 length, u64 offset) +{ + *p++ = cpu_to_be32(handle); + *p++ = cpu_to_be32(length); + return xdr_encode_hyper(p, offset); +} + +/** + * xdr_encode_read_segment - Encode contents of a Read segment + * @p: Pointer into a send buffer + * @position: The position to encode + * @handle: The RDMA handle to encode + * @length: The RDMA length to encode + * @offset: The RDMA offset to encode + * + * Return value: + * Pointer to the XDR position that follows the encoded Read segment + */ +static inline __be32 *xdr_encode_read_segment(__be32 *p, u32 position, + u32 handle, u32 length, + u64 offset) +{ + *p++ = cpu_to_be32(position); + return xdr_encode_rdma_segment(p, handle, length, offset); +} + /** * xdr_decode_rdma_segment - Decode contents of an RDMA segment * @p: Pointer to the undecoded RDMA segment diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5461f01eeca6..73ed51893175 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -275,14 +275,6 @@ out: return n; } -static void -xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) -{ - *iptr++ = cpu_to_be32(mr->mr_handle); - *iptr++ = cpu_to_be32(mr->mr_length); - xdr_encode_hyper(iptr, mr->mr_offset); -} - static int encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr) { @@ -292,7 +284,7 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr) if (unlikely(!p)) return -EMSGSIZE; - xdr_encode_rdma_segment(p, mr); + xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset); return 0; } @@ -307,8 +299,8 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, return -EMSGSIZE; *p++ = xdr_one; /* Item present */ - *p++ = cpu_to_be32(position); - xdr_encode_rdma_segment(p, mr); + xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length, + mr->mr_offset); return 0; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index a78f1d22e9bb..38d8f0ee35ec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -376,7 +376,6 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src, xdr_decode_rdma_segment(src, &handle, &length, &offset); - *p++ = cpu_to_be32(handle); if (*remaining < length) { /* segment only partly filled */ length = *remaining; @@ -385,8 +384,7 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src, /* entire segment was consumed */ *remaining -= length; } - *p++ = cpu_to_be32(length); - xdr_encode_hyper(p, offset); + xdr_encode_rdma_segment(p, handle, length, offset); trace_svcrdma_encode_wseg(handle, length, offset); return len; -- cgit v1.2.3 From f7bd657b55e3484cadc37a6439de23d2fd703bd6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 May 2020 09:30:32 -0400 Subject: svcrdma: Introduce infrastructure to support completion IDs The goal is to replace CQE kernel memory addresses in completion- related tracepoints. Each completion ID matches an incoming Send or Receive completion to a Completion Queue and to a previous ib_post_*(). The ID can then be displayed in an error message or recorded in a trace record. Signed-off-by: Chuck Lever --- include/linux/sunrpc/rpc_rdma_cid.h | 24 +++++++++++++++++++++ include/trace/events/rpcrdma.h | 43 +++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 include/linux/sunrpc/rpc_rdma_cid.h (limited to 'include') diff --git a/include/linux/sunrpc/rpc_rdma_cid.h b/include/linux/sunrpc/rpc_rdma_cid.h new file mode 100644 index 000000000000..be24ab2baa6a --- /dev/null +++ b/include/linux/sunrpc/rpc_rdma_cid.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#ifndef RPC_RDMA_CID_H +#define RPC_RDMA_CID_H + +/* + * The rpc_rdma_cid struct records completion ID information. A + * completion ID matches an incoming Send or Receive completion + * to a Completion Queue and to a previous ib_post_*(). The ID + * can then be displayed in an error message or recorded in a + * trace record. + * + * This struct is shared between the server and client RPC/RDMA + * transport implementations. + */ +struct rpc_rdma_cid { + u32 ci_queue_id; + int ci_completion_id; +}; + +#endif /* RPC_RDMA_CID_H */ diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 0eff80dee066..70ab989aa3b7 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -11,6 +11,7 @@ #define _TRACE_RPCRDMA_H #include +#include #include #include @@ -18,6 +19,48 @@ ** Event classes **/ +DECLARE_EVENT_CLASS(rpcrdma_completion_class, + TP_PROTO( + const struct ib_wc *wc, + const struct rpc_rdma_cid *cid + ), + + TP_ARGS(wc, cid), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) + __field(unsigned long, status) + __field(unsigned int, vendor_err) + ), + + TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; + __entry->status = wc->status; + if (wc->status) + __entry->vendor_err = wc->vendor_err; + else + __entry->vendor_err = 0; + ), + + TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", + __entry->cq_id, __entry->completion_id, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err + ) +); + +#define DEFINE_COMPLETION_EVENT(name) \ + DEFINE_EVENT(rpcrdma_completion_class, name, \ + TP_PROTO( \ + const struct ib_wc *wc, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(wc, cid)) + +DEFINE_COMPLETION_EVENT(dummy); + DECLARE_EVENT_CLASS(xprtrdma_reply_event, TP_PROTO( const struct rpcrdma_rep *rep -- cgit v1.2.3 From 9b3bcf8c5c134038e30624db5b57992ae50b80a9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Apr 2020 16:22:26 -0400 Subject: svcrdma: Introduce Receive completion IDs Set up a completion ID in each svc_rdma_recv_ctxt. The ID is used to match an incoming Receive completion to a transport and to a previous ib_post_recv(). Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 4 +++ include/trace/events/rpcrdma.h | 51 +++++++++++++-------------------- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 15 ++++++++-- 3 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d28ca1b6f2eb..c3c1e46f510f 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -109,6 +110,8 @@ struct svcxprt_rdma { struct work_struct sc_work; struct llist_head sc_recv_ctxts; + + atomic_t sc_completion_ids; }; /* sc_flags */ #define RDMAXPRT_CONN_PENDING 3 @@ -129,6 +132,7 @@ struct svc_rdma_recv_ctxt { struct list_head rc_list; struct ib_recv_wr rc_recv_wr; struct ib_cqe rc_cqe; + struct rpc_rdma_cid rc_cid; struct ib_sge rc_recv_sge; void *rc_recv_buf; struct xdr_buf rc_arg; diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 70ab989aa3b7..a0330a557e34 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -59,8 +59,6 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class, ), \ TP_ARGS(wc, cid)) -DEFINE_COMPLETION_EVENT(dummy); - DECLARE_EVENT_CLASS(xprtrdma_reply_event, TP_PROTO( const struct rpcrdma_rep *rep @@ -1849,57 +1847,48 @@ DEFINE_SENDCOMP_EVENT(send); TRACE_EVENT(svcrdma_post_recv, TP_PROTO( - const struct ib_recv_wr *wr, - int status + const struct svc_rdma_recv_ctxt *ctxt ), - TP_ARGS(wr, status), + TP_ARGS(ctxt), TP_STRUCT__entry( - __field(const void *, cqe) - __field(int, status) + __field(u32, cq_id) + __field(int, completion_id) ), TP_fast_assign( - __entry->cqe = wr->wr_cqe; - __entry->status = status; + __entry->cq_id = ctxt->rc_cid.ci_queue_id; + __entry->completion_id = ctxt->rc_cid.ci_completion_id; ), - TP_printk("cqe=%p status=%d", - __entry->cqe, __entry->status + TP_printk("cq.id=%d cid=%d", + __entry->cq_id, __entry->completion_id ) ); -TRACE_EVENT(svcrdma_wc_receive, +DEFINE_COMPLETION_EVENT(svcrdma_wc_receive); + +TRACE_EVENT(svcrdma_rq_post_err, TP_PROTO( - const struct ib_wc *wc + const struct svcxprt_rdma *rdma, + int status ), - TP_ARGS(wc), + TP_ARGS(rdma, status), TP_STRUCT__entry( - __field(const void *, cqe) - __field(u32, byte_len) - __field(unsigned int, status) - __field(u32, vendor_err) + __field(int, status) + __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( - __entry->cqe = wc->wr_cqe; - __entry->status = wc->status; - if (wc->status) { - __entry->byte_len = 0; - __entry->vendor_err = wc->vendor_err; - } else { - __entry->byte_len = wc->byte_len; - __entry->vendor_err = 0; - } + __entry->status = status; + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("cqe=%p byte_len=%u status=%s (%u/0x%x)", - __entry->cqe, __entry->byte_len, - rdma_show_wc_status(__entry->status), - __entry->status, __entry->vendor_err + TP_printk("addr=%s status=%d", + __get_str(addr), __entry->status ) ); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index c0587d3cd389..e6d7401232d2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -117,6 +117,13 @@ svc_rdma_next_recv_ctxt(struct list_head *list) rc_list); } +static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_rq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + static struct svc_rdma_recv_ctxt * svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) { @@ -135,6 +142,8 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) goto fail2; + svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid); + ctxt->rc_recv_wr.next = NULL; ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge; @@ -249,13 +258,14 @@ static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, int ret; svc_xprt_get(&rdma->sc_xprt); + trace_svcrdma_post_recv(ctxt); ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); - trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret); if (ret) goto err_post; return 0; err_post: + trace_svcrdma_rq_post_err(rdma, ret); svc_rdma_recv_ctxt_put(rdma, ctxt); svc_xprt_put(&rdma->sc_xprt); return ret; @@ -309,11 +319,10 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; - trace_svcrdma_wc_receive(wc); - /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); + trace_svcrdma_wc_receive(wc, &ctxt->rc_cid); if (wc->status != IB_WC_SUCCESS) goto flushed; -- cgit v1.2.3 From 007140ee9b4fc4e59538677799c916890a2f13e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Apr 2020 17:16:31 -0400 Subject: svcrdma: Record Receive completion ID in svc_rdma_decode_rqst When recording a trace event in the Receive path, tie decoding results and errors to an incoming Receive completion. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 34 ++++++++++++++++++++++++++------- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 12 ++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a0330a557e34..df49ae5d447b 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1369,13 +1369,16 @@ TRACE_DEFINE_ENUM(RDMA_ERROR); TRACE_EVENT(svcrdma_decode_rqst, TP_PROTO( + const struct svc_rdma_recv_ctxt *ctxt, __be32 *p, unsigned int hdrlen ), - TP_ARGS(p, hdrlen), + TP_ARGS(ctxt, p, hdrlen), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(u32, xid) __field(u32, vers) __field(u32, proc) @@ -1384,6 +1387,8 @@ TRACE_EVENT(svcrdma_decode_rqst, ), TP_fast_assign( + __entry->cq_id = ctxt->rc_cid.ci_queue_id; + __entry->completion_id = ctxt->rc_cid.ci_completion_id; __entry->xid = be32_to_cpup(p++); __entry->vers = be32_to_cpup(p++); __entry->credits = be32_to_cpup(p++); @@ -1391,37 +1396,48 @@ TRACE_EVENT(svcrdma_decode_rqst, __entry->hdrlen = hdrlen; ), - TP_printk("xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u", + TP_printk("cq.id=%u cid=%d xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u", + __entry->cq_id, __entry->completion_id, __entry->xid, __entry->vers, __entry->credits, show_rpcrdma_proc(__entry->proc), __entry->hdrlen) ); TRACE_EVENT(svcrdma_decode_short_err, TP_PROTO( + const struct svc_rdma_recv_ctxt *ctxt, unsigned int hdrlen ), - TP_ARGS(hdrlen), + TP_ARGS(ctxt, hdrlen), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(unsigned int, hdrlen) ), TP_fast_assign( + __entry->cq_id = ctxt->rc_cid.ci_queue_id; + __entry->completion_id = ctxt->rc_cid.ci_completion_id; __entry->hdrlen = hdrlen; ), - TP_printk("hdrlen=%u", __entry->hdrlen) + TP_printk("cq.id=%u cid=%d hdrlen=%u", + __entry->cq_id, __entry->completion_id, + __entry->hdrlen) ); DECLARE_EVENT_CLASS(svcrdma_badreq_event, TP_PROTO( + const struct svc_rdma_recv_ctxt *ctxt, __be32 *p ), - TP_ARGS(p), + TP_ARGS(ctxt, p), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(u32, xid) __field(u32, vers) __field(u32, proc) @@ -1429,13 +1445,16 @@ DECLARE_EVENT_CLASS(svcrdma_badreq_event, ), TP_fast_assign( + __entry->cq_id = ctxt->rc_cid.ci_queue_id; + __entry->completion_id = ctxt->rc_cid.ci_completion_id; __entry->xid = be32_to_cpup(p++); __entry->vers = be32_to_cpup(p++); __entry->credits = be32_to_cpup(p++); __entry->proc = be32_to_cpup(p); ), - TP_printk("xid=0x%08x vers=%u credits=%u proc=%u", + TP_printk("cq.id=%u cid=%d xid=0x%08x vers=%u credits=%u proc=%u", + __entry->cq_id, __entry->completion_id, __entry->xid, __entry->vers, __entry->credits, __entry->proc) ); @@ -1443,9 +1462,10 @@ DECLARE_EVENT_CLASS(svcrdma_badreq_event, DEFINE_EVENT(svcrdma_badreq_event, \ svcrdma_decode_##name##_err, \ TP_PROTO( \ + const struct svc_rdma_recv_ctxt *ctxt, \ __be32 *p \ ), \ - TP_ARGS(p)) + TP_ARGS(ctxt, p)) DEFINE_BADREQ_EVENT(badvers); DEFINE_BADREQ_EVENT(drop); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index e6d7401232d2..d5ec85cb652c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -667,27 +667,27 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, hdr_len = xdr_stream_pos(&rctxt->rc_stream); rq_arg->head[0].iov_len -= hdr_len; rq_arg->len -= hdr_len; - trace_svcrdma_decode_rqst(rdma_argp, hdr_len); + trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len); return hdr_len; out_short: - trace_svcrdma_decode_short_err(rq_arg->len); + trace_svcrdma_decode_short_err(rctxt, rq_arg->len); return -EINVAL; out_version: - trace_svcrdma_decode_badvers_err(rdma_argp); + trace_svcrdma_decode_badvers_err(rctxt, rdma_argp); return -EPROTONOSUPPORT; out_drop: - trace_svcrdma_decode_drop_err(rdma_argp); + trace_svcrdma_decode_drop_err(rctxt, rdma_argp); return 0; out_proc: - trace_svcrdma_decode_badproc_err(rdma_argp); + trace_svcrdma_decode_badproc_err(rctxt, rdma_argp); return -EINVAL; out_inval: - trace_svcrdma_decode_parse_err(rdma_argp); + trace_svcrdma_decode_parse_err(rctxt, rdma_argp); return -EINVAL; } -- cgit v1.2.3 From 3ac56c2fb166fea25974d8c48bb4a72ee298361b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Apr 2020 13:47:07 -0400 Subject: svcrdma: Introduce Send completion IDs Set up a completion ID in each svc_rdma_send_ctxt. The ID is used to match an incoming Send completion to a transport and to a previous ib_post_send(). Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 2 ++ include/trace/events/rpcrdma.h | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 15 ++++++++++++--- 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c3c1e46f510f..c91e00bc937e 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -151,6 +151,8 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_send_ctxt { struct list_head sc_list; + struct rpc_rdma_cid sc_cid; + struct ib_send_wr sc_send_wr; struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index df49ae5d447b..782a4d826a4b 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1863,7 +1863,7 @@ TRACE_EVENT(svcrdma_post_send, ) ); -DEFINE_SENDCOMP_EVENT(send); +DEFINE_COMPLETION_EVENT(svcrdma_wc_send); TRACE_EVENT(svcrdma_post_recv, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 38d8f0ee35ec..c720dcf56231 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -122,6 +122,13 @@ svc_rdma_next_send_ctxt(struct list_head *list) sc_list); } +static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_sq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + static struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) { @@ -144,6 +151,8 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) goto fail2; + svc_rdma_send_cid_init(rdma, &ctxt->sc_cid); + ctxt->sc_send_wr.next = NULL; ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.sg_list = ctxt->sc_sges; @@ -268,14 +277,14 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) { struct svcxprt_rdma *rdma = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; - struct svc_rdma_send_ctxt *ctxt; + struct svc_rdma_send_ctxt *ctxt = + container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); - trace_svcrdma_wc_send(wc); + trace_svcrdma_wc_send(wc, &ctxt->sc_cid); atomic_inc(&rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); - ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); svc_rdma_send_ctxt_put(rdma, ctxt); if (unlikely(wc->status != IB_WC_SUCCESS)) { -- cgit v1.2.3 From 17f70f8dd52be3723250d21093403bb3a9f2162f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Apr 2020 11:05:33 -0400 Subject: svcrdma: Record send_ctxt completion ID in trace_svcrdma_post_send() First, refactor: Dereference the svc_rdma_send_ctxt inside svc_rdma_send() instead of at every call site. Then, it can be passed into trace_svcrdma_post_send() to get the proper completion ID. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 3 ++- include/trace/events/rpcrdma.h | 18 +++++++++++------- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 11 ++++++----- 4 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c91e00bc937e..9dc3a3b88391 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -196,7 +196,8 @@ extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); +extern int svc_rdma_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_recv_ctxt *rctxt, diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 782a4d826a4b..aeeba9188ed5 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1839,27 +1839,31 @@ DECLARE_EVENT_CLASS(svcrdma_sendcomp_event, TRACE_EVENT(svcrdma_post_send, TP_PROTO( - const struct ib_send_wr *wr + const struct svc_rdma_send_ctxt *ctxt ), - TP_ARGS(wr), + TP_ARGS(ctxt), TP_STRUCT__entry( - __field(const void *, cqe) + __field(u32, cq_id) + __field(int, completion_id) __field(unsigned int, num_sge) __field(u32, inv_rkey) ), TP_fast_assign( - __entry->cqe = wr->wr_cqe; + const struct ib_send_wr *wr = &ctxt->sc_send_wr; + + __entry->cq_id = ctxt->sc_cid.ci_queue_id; + __entry->completion_id = ctxt->sc_cid.ci_completion_id; __entry->num_sge = wr->num_sge; __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ? wr->ex.invalidate_rkey : 0; ), - TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x", - __entry->cqe, __entry->num_sge, - __entry->inv_rkey + TP_printk("cq_id=%u cid=%d num_sge=%u inv_rkey=0x%08x", + __entry->cq_id, __entry->completion_id, + __entry->num_sge, __entry->inv_rkey ) ); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 1ee73f7cf931..5e7c4ba9e147 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -87,7 +87,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, */ get_page(virt_to_page(rqst->rq_buffer)); ctxt->sc_send_wr.opcode = IB_WR_SEND; - return svc_rdma_send(rdma, &ctxt->sc_send_wr); + return svc_rdma_send(rdma, ctxt); } /* Server-side transport endpoint wants a whole page for its send diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index c720dcf56231..73d46e8cdc16 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -298,13 +298,14 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) /** * svc_rdma_send - Post a single Send WR * @rdma: transport on which to post the WR - * @wr: prepared Send WR to post + * @ctxt: send ctxt with a Send WR ready to post * * Returns zero the Send WR was posted successfully. Otherwise, a * negative errno is returned. */ -int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) +int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) { + struct ib_send_wr *wr = &ctxt->sc_send_wr; int ret; might_sleep(); @@ -330,7 +331,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) } svc_xprt_get(&rdma->sc_xprt); - trace_svcrdma_post_send(wr); + trace_svcrdma_post_send(ctxt); ret = ib_post_send(rdma->sc_qp, wr, NULL); if (ret) break; @@ -805,7 +806,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, } else { sctxt->sc_send_wr.opcode = IB_WR_SEND; } - return svc_rdma_send(rdma, &sctxt->sc_send_wr); + return svc_rdma_send(rdma, sctxt); } /** @@ -869,7 +870,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, sctxt->sc_send_wr.num_sge = 1; sctxt->sc_send_wr.opcode = IB_WR_SEND; sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; - if (svc_rdma_send(rdma, &sctxt->sc_send_wr)) + if (svc_rdma_send(rdma, sctxt)) goto put_ctxt; return; -- cgit v1.2.3 From 6787f0bea27a24e4c306616565b02234ee558cfb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Apr 2020 17:25:36 -0400 Subject: svcrdma: Display chunk completion ID when posting a rw_ctxt Re-use the post_rw tracepoint (safely) to trace cc_info lifetime events, including completion IDs. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 56 +++++++++------------------------------ net/sunrpc/xprtrdma/svc_rdma_rw.c | 14 ++++++++-- 2 files changed, 24 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index aeeba9188ed5..abe942225637 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1802,41 +1802,6 @@ TRACE_EVENT(svcrdma_send_err, ) ); -DECLARE_EVENT_CLASS(svcrdma_sendcomp_event, - TP_PROTO( - const struct ib_wc *wc - ), - - TP_ARGS(wc), - - TP_STRUCT__entry( - __field(const void *, cqe) - __field(unsigned int, status) - __field(unsigned int, vendor_err) - ), - - TP_fast_assign( - __entry->cqe = wc->wr_cqe; - __entry->status = wc->status; - if (wc->status) - __entry->vendor_err = wc->vendor_err; - else - __entry->vendor_err = 0; - ), - - TP_printk("cqe=%p status=%s (%u/0x%x)", - __entry->cqe, rdma_show_wc_status(__entry->status), - __entry->status, __entry->vendor_err - ) -); - -#define DEFINE_SENDCOMP_EVENT(name) \ - DEFINE_EVENT(svcrdma_sendcomp_event, svcrdma_wc_##name, \ - TP_PROTO( \ - const struct ib_wc *wc \ - ), \ - TP_ARGS(wc)) - TRACE_EVENT(svcrdma_post_send, TP_PROTO( const struct svc_rdma_send_ctxt *ctxt @@ -1916,31 +1881,34 @@ TRACE_EVENT(svcrdma_rq_post_err, ) ); -TRACE_EVENT(svcrdma_post_rw, +TRACE_EVENT(svcrdma_post_chunk, TP_PROTO( - const void *cqe, + const struct rpc_rdma_cid *cid, int sqecount ), - TP_ARGS(cqe, sqecount), + TP_ARGS(cid, sqecount), TP_STRUCT__entry( - __field(const void *, cqe) + __field(u32, cq_id) + __field(int, completion_id) __field(int, sqecount) ), TP_fast_assign( - __entry->cqe = cqe; + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; __entry->sqecount = sqecount; ), - TP_printk("cqe=%p sqecount=%d", - __entry->cqe, __entry->sqecount + TP_printk("cq.id=%u cid=%d sqecount=%d", + __entry->cq_id, __entry->completion_id, + __entry->sqecount ) ); -DEFINE_SENDCOMP_EVENT(read); -DEFINE_SENDCOMP_EVENT(write); +DEFINE_COMPLETION_EVENT(svcrdma_wc_read); +DEFINE_COMPLETION_EVENT(svcrdma_wc_write); TRACE_EVENT(svcrdma_qp_error, TP_PROTO( diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 2038b1b286dd..c16d10601d65 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -145,15 +145,24 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, * demand, and not cached. */ struct svc_rdma_chunk_ctxt { + struct rpc_rdma_cid cc_cid; struct ib_cqe cc_cqe; struct svcxprt_rdma *cc_rdma; struct list_head cc_rwctxts; int cc_sqecount; }; +static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_sq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} + static void svc_rdma_cc_init(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc) { + svc_rdma_cc_cid_init(rdma, &cc->cc_cid); cc->cc_rdma = rdma; svc_xprt_get(&rdma->sc_xprt); @@ -237,7 +246,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_write_info *info = container_of(cc, struct svc_rdma_write_info, wi_cc); - trace_svcrdma_wc_write(wc); + trace_svcrdma_wc_write(wc, &cc->cc_cid); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); @@ -295,7 +304,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) struct svc_rdma_read_info *info = container_of(cc, struct svc_rdma_read_info, ri_cc); - trace_svcrdma_wc_read(wc); + trace_svcrdma_wc_read(wc, &cc->cc_cid); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wake_up(&rdma->sc_send_wait); @@ -351,6 +360,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) do { if (atomic_sub_return(cc->cc_sqecount, &rdma->sc_sq_avail) > 0) { + trace_svcrdma_post_chunk(&cc->cc_cid, cc->cc_sqecount); ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); if (ret) break; -- cgit v1.2.3 From a68d5a502bbacfbd31f98371f777d574b3a91baf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jul 2020 15:26:47 -0400 Subject: SUNRPC: Refresh the show_rqstp_flags() macro Ensure that show_rqstp_flags() can recognize and display the RQ_AUTHERR flag, added in commit 83dd59a0b9af ("SUNRPC/nfs: Fix return value for nfs4_callback_compound()") and the RQ_DATA flag, added in commit ff3ac5c3dc23 ("SUNRPC: Add a server side per-connection limit"). Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6a12935b8b14..65d7dfbbc9cd 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1250,15 +1250,34 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class, DEFINE_SVCXDRBUF_EVENT(recvfrom); DEFINE_SVCXDRBUF_EVENT(sendto); +/* + * from include/linux/sunrpc/svc.h + */ +#define SVC_RQST_FLAG_LIST \ + svc_rqst_flag(SECURE) \ + svc_rqst_flag(LOCAL) \ + svc_rqst_flag(USEDEFERRAL) \ + svc_rqst_flag(DROPME) \ + svc_rqst_flag(SPLICE_OK) \ + svc_rqst_flag(VICTIM) \ + svc_rqst_flag(BUSY) \ + svc_rqst_flag(DATA) \ + svc_rqst_flag_end(AUTHERR) + +#undef svc_rqst_flag +#undef svc_rqst_flag_end +#define svc_rqst_flag(x) TRACE_DEFINE_ENUM(RQ_##x); +#define svc_rqst_flag_end(x) TRACE_DEFINE_ENUM(RQ_##x); + +SVC_RQST_FLAG_LIST + +#undef svc_rqst_flag +#undef svc_rqst_flag_end +#define svc_rqst_flag(x) { BIT(RQ_##x), #x }, +#define svc_rqst_flag_end(x) { BIT(RQ_##x), #x } + #define show_rqstp_flags(flags) \ - __print_flags(flags, "|", \ - { (1UL << RQ_SECURE), "RQ_SECURE"}, \ - { (1UL << RQ_LOCAL), "RQ_LOCAL"}, \ - { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}, \ - { (1UL << RQ_DROPME), "RQ_DROPME"}, \ - { (1UL << RQ_SPLICE_OK), "RQ_SPLICE_OK"}, \ - { (1UL << RQ_VICTIM), "RQ_VICTIM"}, \ - { (1UL << RQ_BUSY), "RQ_BUSY"}) + __print_flags(flags, "|", SVC_RQST_FLAG_LIST) TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int len), -- cgit v1.2.3