diff options
Diffstat (limited to 'net/sunrpc')
25 files changed, 259 insertions, 301 deletions
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3dfd769dc5b5..cdeb1d814833 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -50,7 +50,7 @@ #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/gss_api.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/hashtable.h> #include "../netns.h" @@ -541,9 +541,13 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); + int res; + atomic_inc(&gss_msg->count); + res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); + atomic_dec(&gss_msg->count); + gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } } else @@ -836,6 +840,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) warn_gssd(); gss_release_msg(gss_msg); } + gss_release_msg(gss_msg); } static void gss_pipe_dentry_destroy(struct dentry *dir, diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 90115ceefd49..fb39284ec174 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -200,7 +200,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(hmac_md5)) goto out_free_md5; - req = ahash_request_alloc(md5, GFP_KERNEL); + req = ahash_request_alloc(md5, GFP_NOFS); if (!req) goto out_free_hmac_md5; @@ -230,7 +230,7 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, goto out; ahash_request_free(req); - req = ahash_request_alloc(hmac_md5, GFP_KERNEL); + req = ahash_request_alloc(hmac_md5, GFP_NOFS); if (!req) goto out_free_hmac_md5; @@ -299,7 +299,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(tfm)) goto out_free_cksum; - req = ahash_request_alloc(tfm, GFP_KERNEL); + req = ahash_request_alloc(tfm, GFP_NOFS); if (!req) goto out_free_ahash; @@ -397,7 +397,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, goto out_free_cksum; checksumlen = crypto_ahash_digestsize(tfm); - req = ahash_request_alloc(tfm, GFP_KERNEL); + req = ahash_request_alloc(tfm, GFP_NOFS); if (!req) goto out_free_ahash; @@ -963,7 +963,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, } desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), - GFP_KERNEL); + GFP_NOFS); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); @@ -1030,7 +1030,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher, } desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), - GFP_KERNEL); + GFP_NOFS); if (!desc) { dprintk("%s: failed to allocate shash descriptor for '%s'\n", __func__, kctx->gk5e->cksum_name); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60595835317a..7bb2514aadd9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -451,8 +451,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx) goto out_err_free_hmac; - desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), - GFP_KERNEL); + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac), GFP_NOFS); if (!desc) { dprintk("%s: failed to allocate hash descriptor for '%s'\n", __func__, ctx->gk5e->cksum_name); diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index dc6fb79a361f..25d9a9cf7b66 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, if (!oa->data) return -ENOMEM; - creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL); + creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { kfree(oa->data); return -ENOMEM; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 45662d7f0943..153082598522 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1489,7 +1489,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; - rsci->h.expiry_time = get_seconds(); + rsci->h.expiry_time = seconds_since_boot(); set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; @@ -1548,7 +1548,7 @@ complete: ret = SVC_COMPLETE; goto out; drop: - ret = SVC_DROP; + ret = SVC_CLOSE; out: if (rsci) cache_put(&rsci->h, sn->rsc_cache); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8aabe12201f8..8147e8d56eb2 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -21,7 +21,7 @@ #include <linux/module.h> #include <linux/ctype.h> #include <linux/string_helpers.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/poll.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62a482790937..1dc9f3bac099 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -336,6 +336,11 @@ out: static DEFINE_IDA(rpc_clids); +void rpc_cleanup_clids(void) +{ + ida_destroy(&rpc_clids); +} + static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; @@ -1926,6 +1931,8 @@ call_connect_status(struct rpc_task *task) case -EADDRINUSE: case -ENOBUFS: case -EPIPE: + xprt_conditional_disconnect(task->tk_rqstp->rq_xprt, + task->tk_rqstp->rq_connect_cookie); if (RPC_IS_SOFTCONN(task)) break; /* retry with existing socket, after a delay */ diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 2ecb994314c1..caeb01ad2b5a 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -157,15 +157,17 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, spin_lock(&op_metrics->om_lock); op_metrics->om_ops++; - op_metrics->om_ntrans += req->rq_ntrans; + /* kernel API: om_ops must never become larger than om_ntrans */ + op_metrics->om_ntrans += max(req->rq_ntrans, 1); op_metrics->om_timeouts += task->tk_timeouts; op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent; op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; - delta = ktime_sub(req->rq_xtime, task->tk_start); - op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); - + if (ktime_to_ns(req->rq_xtime)) { + delta = ktime_sub(req->rq_xtime, task->tk_start); + op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); + } op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); delta = ktime_sub(now, task->tk_start); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index d1c330a7953a..c73de181467a 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -119,6 +119,7 @@ out: static void __exit cleanup_sunrpc(void) { + rpc_cleanup_clids(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 7c8070ec93c8..75f290bddca1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1155,8 +1155,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) case SVC_DENIED: goto err_bad_auth; case SVC_CLOSE: - if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) - svc_close_xprt(rqstp->rq_xprt); + goto close; case SVC_DROP: goto dropit; case SVC_COMPLETE: @@ -1246,7 +1245,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) sendit: if (svc_authorise(rqstp)) - goto dropit; + goto close; return 1; /* Caller can now send it */ dropit: @@ -1254,11 +1253,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) dprintk("svc: svc_process dropit\n"); return 0; + close: + if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) + svc_close_xprt(rqstp->rq_xprt); + dprintk("svc: svc_process close\n"); + return 0; + err_short_len: svc_printk(rqstp, "short len %Zd, dropping request\n", argv->iov_len); - - goto dropit; /* drop request */ + goto close; err_bad_rpc: serv->sv_stats->rpcbadfmt++; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3bc1d61694cb..9c9db55a0c1e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -799,6 +799,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); + if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ goto out; @@ -1020,9 +1022,11 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) le = to_be_closed.next; list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - xprt->xpt_ops->xpo_kill_temp_xprt(xprt); - svc_close_xprt(xprt); + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_KILL_TEMP, &xprt->xpt_flags); + dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n", + xprt); + svc_xprt_enqueue(xprt); } } EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 69841db1f533..e112da8005b5 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -124,8 +124,7 @@ EXPORT_SYMBOL_GPL(svc_auth_unregister); #define DN_HASHMAX (1<<DN_HASHBITS) static struct hlist_head auth_domain_table[DN_HASHMAX]; -static spinlock_t auth_domain_lock = - __SPIN_LOCK_UNLOCKED(auth_domain_lock); +static DEFINE_SPINLOCK(auth_domain_lock); void auth_domain_put(struct auth_domain *dom) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 135ec2c11b3b..de066acdb34e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -42,7 +42,7 @@ #include <net/udp.h> #include <net/tcp.h> #include <net/tcp_states.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/ioctls.h> #include <trace/events/skb.h> @@ -574,7 +574,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) } len = svc_addr_len(svc_addr(rqstp)); rqstp->rq_addrlen = len; - if (skb->tstamp.tv64 == 0) { + if (skb->tstamp == 0) { skb->tstamp = ktime_get_real(); /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index c88d9bc06f5c..8c3936403fea 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -14,7 +14,7 @@ #include <linux/sysctl.h> #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/stats.h> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 685e6d225414..9a6be030ca7d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -669,7 +669,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) spin_lock_bh(&xprt->transport_lock); if (cookie != xprt->connect_cookie) goto out; - if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt)) + if (test_bit(XPRT_CLOSING, &xprt->state)) goto out; set_bit(XPRT_CLOSE_WAIT, &xprt->state); /* Try to schedule an autoclose RPC call */ @@ -772,6 +772,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_connected(xprt)) { task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = task->tk_rqstp->rq_timeout; + task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on(&xprt->pending, task, xprt_connect_status); if (test_bit(XPRT_CLOSING, &xprt->state)) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 2c472e1b4827..24fedd4b117e 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -55,7 +55,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, if (IS_ERR(rb)) goto out_fail; req->rl_sendbuf = rb; - xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size); + xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, + min_t(size_t, size, PAGE_SIZE)); rpcrdma_set_xprtdata(rqst, req); return 0; @@ -191,6 +192,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) size_t maxmsg; maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); + maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE); return maxmsg - RPCRDMA_HDRLEN_MIN; } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 26b26beef2d4..47bed5333c7f 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -101,7 +101,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) struct rpcrdma_frmr *f = &r->frmr; int rc; - f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth); + f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; @@ -157,7 +157,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) return rc; } - f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, + f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, ia->ri_max_frmr_depth); if (IS_ERR(f->fr_mr)) { pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", @@ -171,10 +171,6 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) } /* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. - * - * There's no recovery if this fails. The FRMR is abandoned, but - * remains in rb_all. It will be cleaned up when the transport is - * destroyed. */ static void frwr_op_recover_mr(struct rpcrdma_mw *mw) @@ -210,11 +206,16 @@ static int frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { + struct ib_device_attr *attrs = &ia->ri_device->attrs; int depth, delta; + ia->ri_mrtype = IB_MR_TYPE_MEM_REG; + if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) + ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; + ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - ia->ri_device->attrs.max_fast_reg_page_list_len); + attrs->max_fast_reg_page_list_len); dprintk("RPC: %s: device's max FR page list len = %u\n", __func__, ia->ri_max_frmr_depth); @@ -241,8 +242,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, } ep->rep_attr.cap.max_send_wr *= depth; - if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) { - cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth; + if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { + cdata->max_requests = attrs->max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * @@ -348,6 +349,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, struct rpcrdma_mw **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; + bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; @@ -383,8 +385,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ++seg; ++i; - - /* Check for holes */ + if (holes_ok) + continue; if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; @@ -421,7 +423,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; - DECR_CQCOUNT(&r_xprt->rx_ep); + rpcrdma_set_signaled(&r_xprt->rx_ep, ®_wr->wr); rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); if (rc) goto out_senderr; @@ -451,26 +453,6 @@ out_senderr: return -ENOTCONN; } -static struct ib_send_wr * -__frwr_prepare_linv_wr(struct rpcrdma_mw *mw) -{ - struct rpcrdma_frmr *f = &mw->frmr; - struct ib_send_wr *invalidate_wr; - - dprintk("RPC: %s: invalidating frmr %p\n", __func__, f); - - f->fr_state = FRMR_IS_INVALID; - invalidate_wr = &f->fr_invwr; - - memset(invalidate_wr, 0, sizeof(*invalidate_wr)); - f->fr_cqe.done = frwr_wc_localinv; - invalidate_wr->wr_cqe = &f->fr_cqe; - invalidate_wr->opcode = IB_WR_LOCAL_INV; - invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; - - return invalidate_wr; -} - /* Invalidate all memory regions that were registered for "req". * * Sleeps until it is safe for the host CPU to access the @@ -481,12 +463,12 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw) static void frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { - struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; + struct ib_send_wr *first, **prev, *last, *bad_wr; struct rpcrdma_rep *rep = req->rl_reply; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw, *tmp; struct rpcrdma_frmr *f; - int rc; + int count, rc; dprintk("RPC: %s: req %p\n", __func__, req); @@ -496,22 +478,29 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ f = NULL; - invalidate_wrs = pos = prev = NULL; + count = 0; + prev = &first; list_for_each_entry(mw, &req->rl_registered, mw_list) { + mw->frmr.fr_state = FRMR_IS_INVALID; + if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) && - (mw->mw_handle == rep->rr_inv_rkey)) { - mw->frmr.fr_state = FRMR_IS_INVALID; + (mw->mw_handle == rep->rr_inv_rkey)) continue; - } - - pos = __frwr_prepare_linv_wr(mw); - if (!invalidate_wrs) - invalidate_wrs = pos; - else - prev->next = pos; - prev = pos; f = &mw->frmr; + dprintk("RPC: %s: invalidating frmr %p\n", + __func__, f); + + f->fr_cqe.done = frwr_wc_localinv; + last = &f->fr_invwr; + memset(last, 0, sizeof(*last)); + last->wr_cqe = &f->fr_cqe; + last->opcode = IB_WR_LOCAL_INV; + last->ex.invalidate_rkey = mw->mw_handle; + count++; + + *prev = last; + prev = &last->next; } if (!f) goto unmap; @@ -520,17 +509,22 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * last WR in the chain completes, all WRs in the chain * are complete. */ - f->fr_invwr.send_flags = IB_SEND_SIGNALED; + last->send_flags = IB_SEND_SIGNALED; f->fr_cqe.done = frwr_wc_localinv_wake; reinit_completion(&f->fr_linv_done); - INIT_CQCOUNT(&r_xprt->rx_ep); + + /* Initialize CQ count, since there is always a signaled + * WR being posted here. The new cqcount depends on how + * many SQEs are about to be consumed. + */ + rpcrdma_init_cqcount(&r_xprt->rx_ep, count); /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us * unless ri_id->qp is a valid pointer. */ r_xprt->rx_stats.local_inv_needed++; - rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); if (rc) goto reset_mrs; @@ -541,7 +535,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ unmap: list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { - dprintk("RPC: %s: unmapping frmr %p\n", + dprintk("RPC: %s: DMA unmapping frmr %p\n", __func__, &mw->frmr); list_del_init(&mw->mw_list); ib_dma_unmap_sg(ia->ri_device, @@ -559,7 +553,7 @@ reset_mrs: */ list_for_each_entry(mw, &req->rl_registered, mw_list) { f = &mw->frmr; - if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { + if (mw->mw_handle == bad_wr->ex.invalidate_rkey) { __frwr_reset_mr(ia, mw); bad_wr = bad_wr->next; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index d987c2d3dd6e..c52e0f2ffe52 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -786,7 +786,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp) ifdebug(FACILITY) { u64 off; xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); - dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", + dprintk("RPC: %s: chunk %d@0x%016llx:0x%08x\n", __func__, be32_to_cpu(seg->rs_length), (unsigned long long)off, @@ -906,28 +906,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) return fixup_copy_count; } -void -rpcrdma_connect_worker(struct work_struct *work) -{ - struct rpcrdma_ep *ep = - container_of(work, struct rpcrdma_ep, rep_connect_worker.work); - struct rpcrdma_xprt *r_xprt = - container_of(ep, struct rpcrdma_xprt, rx_ep); - struct rpc_xprt *xprt = &r_xprt->rx_xprt; - - spin_lock_bh(&xprt->transport_lock); - if (++xprt->connect_cookie == 0) /* maintain a reserved value */ - ++xprt->connect_cookie; - if (ep->rep_connected > 0) { - if (!xprt_test_and_set_connected(xprt)) - xprt_wake_pending_tasks(xprt, 0); - } else { - if (xprt_test_and_clear_connected(xprt)) - xprt_wake_pending_tasks(xprt, -ENOTCONN); - } - spin_unlock_bh(&xprt->transport_lock); -} - #if defined(CONFIG_SUNRPC_BACKCHANNEL) /* By convention, backchannel calls arrive via rdma_msg type * messages, and never populate the chunk lists. This makes @@ -959,18 +937,6 @@ rpcrdma_is_bcall(struct rpcrdma_msg *headerp) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -/* - * This function is called when an async event is posted to - * the connection which changes the connection state. All it - * does at this point is mark the connection up/down, the rpc - * timers do the rest. - */ -void -rpcrdma_conn_func(struct rpcrdma_ep *ep) -{ - schedule_delayed_work(&ep->rep_connect_worker, 0); -} - /* Process received RPC/RDMA messages. * * Errors must result in the RPC task either being awakened, or diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 20027f8de129..288e35c2d8f4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -164,13 +164,9 @@ static int xprt_rdma_bc_allocate(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; - struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; size_t size = rqst->rq_callsize; - struct svcxprt_rdma *rdma; struct page *page; - rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - if (size > PAGE_SIZE) { WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", size); @@ -359,6 +355,7 @@ xprt_setup_rdma_bc(struct xprt_create *args) out_fail: xprt_rdma_free_addresses(xprt); args->bc_xprt->xpt_bc_xprt = NULL; + args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); xprt_free(xprt); return ERR_PTR(-EINVAL); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ad1df979b3f0..172b537f8cfc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -279,7 +279,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, frmr->sg); return -ENOMEM; } - atomic_inc(&xprt->sc_dma_used); n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { @@ -348,8 +347,6 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - ib_dma_unmap_sg(xprt->sc_cm_id->device, - frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; @@ -374,9 +371,7 @@ rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, u32 position, u32 byte_count, u32 page_offset, int page_no) { char *srcp, *destp; - int ret; - ret = 0; srcp = head->arg.head[0].iov_base + position; byte_count = head->arg.head[0].iov_len - position; if (byte_count > PAGE_SIZE) { @@ -415,6 +410,20 @@ done: return 1; } +/* Returns the address of the first read chunk or <nul> if no read chunk + * is present + */ +static struct rpcrdma_read_chunk * +svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *ch = + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + + if (ch->rc_discrim == xdr_zero) + return NULL; + return ch; +} + static int rdma_read_chunks(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp, @@ -627,8 +636,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto defer; goto out; } - dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", - ctxt, rdma_xprt, rqstp, ctxt->wc_status); + dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n", + ctxt, rdma_xprt, rqstp); atomic_inc(&rdma_stat_recv); /* Build up the XDR from the receive buffers. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f5a91edcd233..ad4d286a83c5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -153,76 +153,35 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, return dma_addr; } -/* Returns the address of the first read chunk or <nul> if no read chunk - * is present +/* Parse the RPC Call's transport header. */ -struct rpcrdma_read_chunk * -svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp, + struct rpcrdma_write_array **write, + struct rpcrdma_write_array **reply) { - struct rpcrdma_read_chunk *ch = - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + __be32 *p; - if (ch->rc_discrim == xdr_zero) - return NULL; - return ch; -} + p = (__be32 *)&rmsgp->rm_body.rm_chunks[0]; -/* Returns the address of the first read write array element or <nul> - * if no write array list is present - */ -static struct rpcrdma_write_array * -svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) -{ - if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || - rmsgp->rm_body.rm_chunks[1] == xdr_zero) - return NULL; - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; -} + /* Read list */ + while (*p++ != xdr_zero) + p += 5; -/* Returns the address of the first reply array element or <nul> if no - * reply array is present - */ -static struct rpcrdma_write_array * -svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp, - struct rpcrdma_write_array *wr_ary) -{ - struct rpcrdma_read_chunk *rch; - struct rpcrdma_write_array *rp_ary; - - /* XXX: Need to fix when reply chunk may occur with read list - * and/or write list. - */ - if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || - rmsgp->rm_body.rm_chunks[1] != xdr_zero) - return NULL; - - rch = svc_rdma_get_read_chunk(rmsgp); - if (rch) { - while (rch->rc_discrim != xdr_zero) - rch++; - - /* The reply chunk follows an empty write array located - * at 'rc_position' here. The reply array is at rc_target. - */ - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; - goto found_it; - } - - if (wr_ary) { - int chunk = be32_to_cpu(wr_ary->wc_nchunks); - - rp_ary = (struct rpcrdma_write_array *) - &wr_ary->wc_array[chunk].wc_target.rs_length; - goto found_it; + /* Write list */ + if (*p != xdr_zero) { + *write = (struct rpcrdma_write_array *)p; + while (*p++ != xdr_zero) + p += 1 + be32_to_cpu(*p) * 4; + } else { + *write = NULL; + p++; } - /* No read list, no write list */ - rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2]; - - found_it: - if (rp_ary->wc_discrim == xdr_zero) - return NULL; - return rp_ary; + /* Reply chunk */ + if (*p != xdr_zero) + *reply = (struct rpcrdma_write_array *)p; + else + *reply = NULL; } /* RPC-over-RDMA Version One private extension: Remote Invalidation. @@ -240,31 +199,22 @@ static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp, { struct rpcrdma_read_chunk *rd_ary; struct rpcrdma_segment *arg_ch; - u32 inv_rkey; - inv_rkey = 0; - - rd_ary = svc_rdma_get_read_chunk(rdma_argp); - if (rd_ary) { - inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle); - goto out; - } + rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0]; + if (rd_ary->rc_discrim != xdr_zero) + return be32_to_cpu(rd_ary->rc_target.rs_handle); if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) { arg_ch = &wr_ary->wc_array[0].wc_target; - inv_rkey = be32_to_cpu(arg_ch->rs_handle); - goto out; + return be32_to_cpu(arg_ch->rs_handle); } if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) { arg_ch = &rp_ary->wc_array[0].wc_target; - inv_rkey = be32_to_cpu(arg_ch->rs_handle); - goto out; + return be32_to_cpu(arg_ch->rs_handle); } -out: - dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey); - return inv_rkey; + return 0; } /* Assumptions: @@ -622,8 +572,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) * places this at the start of page 0. */ rdma_argp = page_address(rqstp->rq_pages[0]); - wr_ary = svc_rdma_get_write_array(rdma_argp); - rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary); + svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary); inv_rkey = 0; if (rdma->sc_snd_w_inv) @@ -636,7 +585,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) goto err0; inline_bytes = rqstp->rq_res.len; - /* Create the RDMA response header */ + /* Create the RDMA response header. xprt->xpt_mutex, + * acquired in svc_send(), serializes RPC replies. The + * code path below that inserts the credit grant value + * into each transport header runs only inside this + * critical section. + */ ret = -ENOMEM; res_page = alloc_page(GFP_KERNEL); if (!res_page) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 1334de2715c2..ca2799af05a6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -41,6 +41,7 @@ */ #include <linux/sunrpc/svc_xprt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> #include <linux/interrupt.h> @@ -226,25 +227,22 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) struct svcxprt_rdma *xprt = ctxt->xprt; struct ib_device *device = xprt->sc_cm_id->device; u32 lkey = xprt->sc_pd->local_dma_lkey; - unsigned int i, count; + unsigned int i; - for (count = 0, i = 0; i < ctxt->mapped_sges; i++) { + for (i = 0; i < ctxt->mapped_sges; i++) { /* * Unmap the DMA addr in the SGE if the lkey matches * the local_dma_lkey, otherwise, ignore it since it is * an FRMR lkey and will be unmapped later when the * last WR that uses it completes. */ - if (ctxt->sge[i].lkey == lkey) { - count++; + if (ctxt->sge[i].lkey == lkey) ib_dma_unmap_page(device, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->direction); - } } ctxt->mapped_sges = 0; - atomic_sub(count, &xprt->sc_dma_used); } void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) @@ -398,7 +396,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); - ctxt->wc_status = wc->status; svc_rdma_unmap_dma(ctxt); if (wc->status != IB_WC_SUCCESS) @@ -436,7 +433,7 @@ static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt, goto err; out: - atomic_dec(&xprt->sc_sq_count); + atomic_inc(&xprt->sc_sq_avail); wake_up(&xprt->sc_send_wait); return; @@ -946,7 +943,6 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, if (frmr) { ib_dma_unmap_sg(rdma->sc_cm_id->device, frmr->sg, frmr->sg_nents, frmr->direction); - atomic_dec(&rdma->sc_dma_used); spin_lock_bh(&rdma->sc_frmr_q_lock); WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); @@ -973,6 +969,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rpcrdma_connect_private pmsg; struct ib_qp_init_attr qp_attr; struct ib_device *dev; + struct sockaddr *sap; unsigned int i; int ret = 0; @@ -1010,6 +1007,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth; + atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); if (!svc_rdma_prealloc_ctxts(newxprt)) goto errout; @@ -1052,18 +1050,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = newxprt->sc_sq_cq; qp_attr.recv_cq = newxprt->sc_rq_cq; - dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n" - " cm_id->device=%p, sc_pd->device=%p\n" - " cap.max_send_wr = %d\n" - " cap.max_recv_wr = %d\n" - " cap.max_send_sge = %d\n" - " cap.max_recv_sge = %d\n", - newxprt->sc_cm_id, newxprt->sc_pd, - dev, newxprt->sc_pd->device, - qp_attr.cap.max_send_wr, - qp_attr.cap.max_recv_wr, - qp_attr.cap.max_send_sge, - qp_attr.cap.max_recv_sge); + dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n", + newxprt->sc_cm_id, newxprt->sc_pd); + dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n", + qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr); + dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n", + qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge); ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); if (ret) { @@ -1146,31 +1138,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) goto errout; } - dprintk("svcrdma: new connection %p accepted with the following " - "attributes:\n" - " local_ip : %pI4\n" - " local_port : %d\n" - " remote_ip : %pI4\n" - " remote_port : %d\n" - " max_sge : %d\n" - " max_sge_rd : %d\n" - " sq_depth : %d\n" - " max_requests : %d\n" - " ord : %d\n", - newxprt, - &((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.src_addr)->sin_addr.s_addr, - ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.src_addr)->sin_port), - &((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.dst_addr)->sin_addr.s_addr, - ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> - route.addr.dst_addr)->sin_port), - newxprt->sc_max_sge, - newxprt->sc_max_sge_rd, - newxprt->sc_sq_depth, - newxprt->sc_max_requests, - newxprt->sc_ord); + dprintk("svcrdma: new connection %p accepted:\n", newxprt); + sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; + dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); + sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; + dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); + dprintk(" max_sge : %d\n", newxprt->sc_max_sge); + dprintk(" max_sge_rd : %d\n", newxprt->sc_max_sge_rd); + dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); + dprintk(" max_requests : %d\n", newxprt->sc_max_requests); + dprintk(" ord : %d\n", newxprt->sc_ord); return &newxprt->sc_xprt; @@ -1257,9 +1234,6 @@ static void __svc_rdma_free(struct work_struct *work) if (rdma->sc_ctxt_used != 0) pr_err("svcrdma: ctxt still in use? (%d)\n", rdma->sc_ctxt_used); - if (atomic_read(&rdma->sc_dma_used) != 0) - pr_err("svcrdma: dma still in use? (%d)\n", - atomic_read(&rdma->sc_dma_used)); /* Final put of backchannel client transport */ if (xprt->xpt_bc_xprt) { @@ -1339,15 +1313,13 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) /* If the SQ is full, wait until an SQ entry is available */ while (1) { - spin_lock_bh(&xprt->sc_lock); - if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) { - spin_unlock_bh(&xprt->sc_lock); + if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) { atomic_inc(&rdma_stat_sq_starve); /* Wait until SQ WR available if SQ still full */ + atomic_add(wr_count, &xprt->sc_sq_avail); wait_event(xprt->sc_send_wait, - atomic_read(&xprt->sc_sq_count) < - xprt->sc_sq_depth); + atomic_read(&xprt->sc_sq_avail) > wr_count); if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return -ENOTCONN; continue; @@ -1357,21 +1329,17 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) svc_xprt_get(&xprt->sc_xprt); /* Bump used SQ WR count and post */ - atomic_add(wr_count, &xprt->sc_sq_count); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); if (ret) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - atomic_sub(wr_count, &xprt->sc_sq_count); for (i = 0; i < wr_count; i ++) svc_xprt_put(&xprt->sc_xprt); - dprintk("svcrdma: failed to post SQ WR rc=%d, " - "sc_sq_count=%d, sc_sq_depth=%d\n", - ret, atomic_read(&xprt->sc_sq_count), - xprt->sc_sq_depth); - } - spin_unlock_bh(&xprt->sc_lock); - if (ret) + dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret); + dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n", + atomic_read(&xprt->sc_sq_avail), + xprt->sc_sq_depth); wake_up(&xprt->sc_send_wait); + } break; } return ret; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ed5e285fd2ea..534c178d2a7e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -219,6 +219,34 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt) } } +void +rpcrdma_conn_func(struct rpcrdma_ep *ep) +{ + schedule_delayed_work(&ep->rep_connect_worker, 0); +} + +void +rpcrdma_connect_worker(struct work_struct *work) +{ + struct rpcrdma_ep *ep = + container_of(work, struct rpcrdma_ep, rep_connect_worker.work); + struct rpcrdma_xprt *r_xprt = + container_of(ep, struct rpcrdma_xprt, rx_ep); + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + + spin_lock_bh(&xprt->transport_lock); + if (++xprt->connect_cookie == 0) /* maintain a reserved value */ + ++xprt->connect_cookie; + if (ep->rep_connected > 0) { + if (!xprt_test_and_set_connected(xprt)) + xprt_wake_pending_tasks(xprt, 0); + } else { + if (xprt_test_and_clear_connected(xprt)) + xprt_wake_pending_tasks(xprt, -ENOTCONN); + } + spin_unlock_bh(&xprt->transport_lock); +} + static void xprt_rdma_connect_worker(struct work_struct *work) { @@ -621,7 +649,8 @@ xprt_rdma_free(struct rpc_task *task) dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); + if (unlikely(!list_empty(&req->rl_registered))) + ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); rpcrdma_unmap_sges(ia, req); rpcrdma_buffer_put(req); } @@ -657,7 +686,8 @@ xprt_rdma_send_request(struct rpc_task *task) int rc = 0; /* On retransmit, remove any previously registered chunks */ - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); + if (unlikely(!list_empty(&req->rl_registered))) + r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); rc = rpcrdma_marshal_req(rqst); if (rc < 0) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ec74289af7ec..11d07748f699 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -103,9 +103,9 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - pr_err("RPC: %s: %s on device %s ep %p\n", - __func__, ib_event_msg(event->event), - event->device->name, context); + pr_err("rpcrdma: %s on device %s ep %p\n", + ib_event_msg(event->event), event->device->name, context); + if (ep->rep_connected == 1) { ep->rep_connected = -EIO; rpcrdma_conn_func(ep); @@ -223,8 +223,8 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, cdata->inline_rsize = rsize; if (wsize < cdata->inline_wsize) cdata->inline_wsize = wsize; - pr_info("rpcrdma: max send %u, max recv %u\n", - cdata->inline_wsize, cdata->inline_rsize); + dprintk("RPC: %s: max send %u, max recv %u\n", + __func__, cdata->inline_wsize, cdata->inline_rsize); rpcrdma_set_max_header_sizes(r_xprt); } @@ -331,6 +331,7 @@ static struct rdma_cm_id * rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia, struct sockaddr *addr) { + unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; struct rdma_cm_id *id; int rc; @@ -352,8 +353,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, __func__, rc); goto out; } - wait_for_completion_interruptible_timeout(&ia->ri_done, - msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); + rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); + if (rc < 0) { + dprintk("RPC: %s: wait() exited: %i\n", + __func__, rc); + goto out; + } /* FIXME: * Until xprtrdma supports DEVICE_REMOVAL, the provider must @@ -376,8 +381,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, __func__, rc); goto put; } - wait_for_completion_interruptible_timeout(&ia->ri_done, - msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); + rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); + if (rc < 0) { + dprintk("RPC: %s: wait() exited: %i\n", + __func__, rc); + goto put; + } rc = ia->ri_async_rc; if (rc) goto put; @@ -532,7 +541,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; /* always signal? */ - INIT_CQCOUNT(ep); + rpcrdma_init_cqcount(ep, 0); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); @@ -1311,13 +1320,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, dprintk("RPC: %s: posting %d s/g entries\n", __func__, send_wr->num_sge); - if (DECR_CQCOUNT(ep) > 0) - send_wr->send_flags = 0; - else { /* Provider must take a send completion every now and then */ - INIT_CQCOUNT(ep); - send_wr->send_flags = IB_SEND_SIGNALED; - } - + rpcrdma_set_signaled(ep, send_wr); rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); if (rc) goto out_postsend_err; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 6e1bba358203..e35efd4ac1e4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -75,6 +75,7 @@ struct rpcrdma_ia { unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; bool ri_reminv_expected; + enum ib_mr_type ri_mrtype; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; }; @@ -95,8 +96,24 @@ struct rpcrdma_ep { struct delayed_work rep_connect_worker; }; -#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) -#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) +static inline void +rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count) +{ + atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count); +} + +/* To update send queue accounting, provider must take a + * send completion every now and then. + */ +static inline void +rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr) +{ + send_wr->send_flags = 0; + if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) { + rpcrdma_init_cqcount(ep, 0); + send_wr->send_flags = IB_SEND_SIGNALED; + } +} /* Pre-allocate extra Work Requests for handling backward receives * and sends. This is a fixed value because the Work Queues are @@ -473,6 +490,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, struct rpcrdma_create_data_internal *); void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); +void rpcrdma_conn_func(struct rpcrdma_ep *ep); void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, @@ -532,13 +550,6 @@ rpcrdma_data_dir(bool writing) } /* - * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c - */ -void rpcrdma_connect_worker(struct work_struct *); -void rpcrdma_conn_func(struct rpcrdma_ep *); -void rpcrdma_reply_handler(struct work_struct *); - -/* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c */ @@ -555,12 +566,14 @@ bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); int rpcrdma_marshal_req(struct rpc_rqst *); void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); +void rpcrdma_reply_handler(struct work_struct *work); /* RPC/RDMA module init - xprtrdma/transport.c */ extern unsigned int xprt_rdma_max_inline_read; void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); void xprt_rdma_free_addresses(struct rpc_xprt *xprt); +void rpcrdma_connect_worker(struct work_struct *work); void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); int xprt_rdma_init(void); void xprt_rdma_cleanup(void); |