diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-19 02:17:30 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-19 02:17:30 +0200 |
commit | 4f40c636b291deeae7d1f4c9fb5db5f0aac54267 (patch) | |
tree | 3f6ad7456586ae41db514dfc649b3d79fae51597 /net | |
parent | Merge tag 'ext4_for_linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/ker... (diff) | |
parent | SUNRPC: Fixup gss_status tracepoint error output (diff) | |
download | linux-4f40c636b291deeae7d1f4c9fb5db5f0aac54267.tar.xz linux-4f40c636b291deeae7d1f4c9fb5db5f0aac54267.zip |
Merge tag 'nfs-for-6.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker:
"New Features:
- Add support for large folios
- Implement rpcrdma generic device removal notification
- Add client support for attribute delegations
- Use a LAYOUTRETURN during reboot recovery to report layoutstats
and errors
- Improve throughput for random buffered writes
- Add NVMe support to pnfs/blocklayout
Bugfixes:
- Fix rpcrdma_reqs_reset()
- Avoid soft lockups when using UDP
- Fix an nfs/blocklayout premature PR key unregestration
- Another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server
- Do not extend writes to the entire folio
- Pass explicit offset and count values to tracepoints
- Fix a race to wake up sleeping SUNRPC sync tasks
- Fix gss_status tracepoint output
Cleanups:
- Add missing MODULE_DESCRIPTION() macros
- Add blocklayout / SCSI layout tracepoints
- Remove asm-generic headers from xprtrdma verbs.c
- Remove unused 'struct mnt_fhstatus'
- Other delegation related cleanups
- Other folio related cleanups
- Other pNFS related cleanups
- Other xprtrdma cleanups"
* tag 'nfs-for-6.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (63 commits)
SUNRPC: Fixup gss_status tracepoint error output
SUNRPC: Fix a race to wake a sync task
nfs: split nfs_read_folio
nfs: pass explicit offset/count to trace events
nfs: do not extend writes to the entire folio
nfs/blocklayout: add support for NVMe
nfs: remove nfs_page_length
nfs: remove the unused max_deviceinfo_size field from struct pnfs_layoutdriver_type
nfs: don't reuse partially completed requests in nfs_lock_and_join_requests
nfs: move nfs_wait_on_request to write.c
nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests
nfs: fold nfs_folio_find_and_lock_request into nfs_lock_and_join_requests
nfs: simplify nfs_folio_find_and_lock_request
nfs: remove nfs_folio_private_request
nfs: remove dead code for the old swap over NFS implementation
NFSv4.1 another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server
nfs: Block on write congestion
nfs: Properly initialize server->writeback
nfs: Drop pointless check from nfs_commit_release_pages()
nfs/blocklayout: SCSI layout trace points for reservation key reg/unreg
...
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/clnt.c | 3 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 4 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/Makefile | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 9 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/ib_client.c | 181 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/module.c | 18 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 100 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 5 |
9 files changed, 266 insertions, 59 deletions
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cfd1b1bf7e35..09f29a95f2bc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2326,12 +2326,13 @@ call_transmit_status(struct rpc_task *task) task->tk_action = call_transmit; task->tk_status = 0; break; - case -ECONNREFUSED: case -EHOSTDOWN: case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: + break; + case -ECONNREFUSED: if (RPC_IS_SOFTCONN(task)) { if (!task->tk_msg.rpc_proc->p_proc) trace_xprt_ping(task->tk_xprt, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6debf4fd42d4..cef623ea1506 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -369,8 +369,10 @@ static void rpc_make_runnable(struct workqueue_struct *wq, if (RPC_IS_ASYNC(task)) { INIT_WORK(&task->u.tk_work, rpc_async_schedule); queue_work(wq, &task->u.tk_work); - } else + } else { + smp_mb__after_atomic(); wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); + } } /* diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 55b21bae866d..3232aa23cdb4 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o -rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \ +rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ svc_rdma_pcl.o module.o diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ffbf99894970..31434aeb8e29 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -54,7 +54,7 @@ static void frwr_cid_init(struct rpcrdma_ep *ep, cid->ci_completion_id = mr->mr_ibmr->res.id; } -static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) +static void frwr_mr_unmap(struct rpcrdma_mr *mr) { if (mr->mr_device) { trace_xprtrdma_mr_unmap(mr); @@ -73,7 +73,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr) { int rc; - frwr_mr_unmap(mr->mr_xprt, mr); + frwr_mr_unmap(mr); rc = ib_dereg_mr(mr->mr_ibmr); if (rc) @@ -84,7 +84,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr) static void frwr_mr_put(struct rpcrdma_mr *mr) { - frwr_mr_unmap(mr->mr_xprt, mr); + frwr_mr_unmap(mr); /* The MR is returned to the req's MR free list instead * of to the xprt's MR free list. No spinlock is needed. @@ -92,7 +92,8 @@ static void frwr_mr_put(struct rpcrdma_mr *mr) rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); } -/* frwr_reset - Place MRs back on the free list +/** + * frwr_reset - Place MRs back on @req's free list * @req: request to reset * * Used after a failed marshal. For FRWR, this means the MRs diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c new file mode 100644 index 000000000000..a938c19c3490 --- /dev/null +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* + * Copyright (c) 2024 Oracle. All rights reserved. + */ + +/* #include <linux/module.h> +#include <linux/slab.h> */ +#include <linux/xarray.h> +#include <linux/types.h> +#include <linux/kref.h> +#include <linux/completion.h> + +#include <linux/sunrpc/svc_rdma.h> +#include <linux/sunrpc/rdma_rn.h> + +#include "xprt_rdma.h" +#include <trace/events/rpcrdma.h> + +/* Per-ib_device private data for rpcrdma */ +struct rpcrdma_device { + struct kref rd_kref; + unsigned long rd_flags; + struct ib_device *rd_device; + struct xarray rd_xa; + struct completion rd_done; +}; + +#define RPCRDMA_RD_F_REMOVING (0) + +static struct ib_client rpcrdma_ib_client; + +/* + * Listeners have no associated device, so we never register them. + * Note that ib_get_client_data() does not check if @device is + * NULL for us. + */ +static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device) +{ + if (!device) + return NULL; + return ib_get_client_data(device, &rpcrdma_ib_client); +} + +/** + * rpcrdma_rn_register - register to get device removal notifications + * @device: device to monitor + * @rn: notification object that wishes to be notified + * @done: callback to notify caller of device removal + * + * Returns zero on success. The callback in rn_done is guaranteed + * to be invoked when the device is removed, unless this notification + * is unregistered first. + * + * On failure, a negative errno is returned. + */ +int rpcrdma_rn_register(struct ib_device *device, + struct rpcrdma_notification *rn, + void (*done)(struct rpcrdma_notification *rn)) +{ + struct rpcrdma_device *rd = rpcrdma_get_client_data(device); + + if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags)) + return -ENETUNREACH; + + kref_get(&rd->rd_kref); + if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0) + return -ENOMEM; + rn->rn_done = done; + return 0; +} + +static void rpcrdma_rn_release(struct kref *kref) +{ + struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device, + rd_kref); + + trace_rpcrdma_client_completion(rd->rd_device); + complete(&rd->rd_done); +} + +/** + * rpcrdma_rn_unregister - stop device removal notifications + * @device: monitored device + * @rn: notification object that no longer wishes to be notified + */ +void rpcrdma_rn_unregister(struct ib_device *device, + struct rpcrdma_notification *rn) +{ + struct rpcrdma_device *rd = rpcrdma_get_client_data(device); + + if (!rd) + return; + + xa_erase(&rd->rd_xa, rn->rn_index); + kref_put(&rd->rd_kref, rpcrdma_rn_release); +} + +/** + * rpcrdma_add_one - ib_client device insertion callback + * @device: device about to be inserted + * + * Returns zero on success. xprtrdma private data has been allocated + * for this device. On failure, a negative errno is returned. + */ +static int rpcrdma_add_one(struct ib_device *device) +{ + struct rpcrdma_device *rd; + + rd = kzalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + kref_init(&rd->rd_kref); + xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1); + rd->rd_device = device; + init_completion(&rd->rd_done); + ib_set_client_data(device, &rpcrdma_ib_client, rd); + + trace_rpcrdma_client_add_one(device); + return 0; +} + +/** + * rpcrdma_remove_one - ib_client device removal callback + * @device: device about to be removed + * @client_data: this module's private per-device data + * + * Upon return, all transports associated with @device have divested + * themselves from IB hardware resources. + */ +static void rpcrdma_remove_one(struct ib_device *device, + void *client_data) +{ + struct rpcrdma_device *rd = client_data; + struct rpcrdma_notification *rn; + unsigned long index; + + trace_rpcrdma_client_remove_one(device); + + set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags); + xa_for_each(&rd->rd_xa, index, rn) + rn->rn_done(rn); + + /* + * Wait only if there are still outstanding notification + * registrants for this device. + */ + if (!refcount_dec_and_test(&rd->rd_kref.refcount)) { + trace_rpcrdma_client_wait_on(device); + wait_for_completion(&rd->rd_done); + } + + trace_rpcrdma_client_remove_one_done(device); + kfree(rd); +} + +static struct ib_client rpcrdma_ib_client = { + .name = "rpcrdma", + .add = rpcrdma_add_one, + .remove = rpcrdma_remove_one, +}; + +/** + * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma + * + * cel: watch for orphaned rpcrdma_device objects on module unload + */ +void rpcrdma_ib_client_unregister(void) +{ + ib_unregister_client(&rpcrdma_ib_client); +} + +/** + * rpcrdma_ib_client_register - register ib_client for rpcrdma + * + * Returns zero on success, or a negative errno. + */ +int rpcrdma_ib_client_register(void) +{ + return ib_register_client(&rpcrdma_ib_client); +} diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c index 45c5b41ac8dc..697f571d4c01 100644 --- a/net/sunrpc/xprtrdma/module.c +++ b/net/sunrpc/xprtrdma/module.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/sunrpc/svc_rdma.h> +#include <linux/sunrpc/rdma_rn.h> #include <asm/swab.h> @@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void) { xprt_rdma_cleanup(); svc_rdma_cleanup(); + rpcrdma_ib_client_unregister(); } static int __init rpc_rdma_init(void) { int rc; + rc = rpcrdma_ib_client_register(); + if (rc) + goto out_rc; + rc = svc_rdma_init(); if (rc) - goto out; + goto out_ib_client; rc = xprt_rdma_init(); if (rc) - svc_rdma_cleanup(); + goto out_svc_rdma; -out: + return 0; + +out_svc_rdma: + svc_rdma_cleanup(); +out_ib_client: + rpcrdma_ib_client_unregister(); +out_rc: return rc; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 190a4de239c8..1478c41c7e9d 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1471,8 +1471,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) credits = 1; /* don't deadlock */ else if (credits > r_xprt->rx_ep->re_max_requests) credits = r_xprt->rx_ep->re_max_requests; - rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1), - false); + rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1)); if (buf->rb_credits != credits) rpcrdma_update_cwnd(r_xprt, credits); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 432557a553e7..63262ef0c2e3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -49,14 +49,14 @@ * o buffer memory */ +#include <linux/bitops.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc_rdma.h> #include <linux/log2.h> -#include <asm-generic/barrier.h> -#include <asm/bitops.h> +#include <asm/barrier.h> #include <rdma/ib_cm.h> @@ -69,13 +69,15 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, struct rpcrdma_sendctx *sc); static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_ep_get(struct rpcrdma_ep *ep); static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * +rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction, + int node); +static struct rpcrdma_regbuf * rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction); static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); @@ -222,7 +224,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, static int rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { - struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; struct rpcrdma_ep *ep = id->context; might_sleep(); @@ -241,14 +242,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_async_rc = -ENETUNREACH; complete(&ep->re_done); return 0; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - pr_info("rpcrdma: removing device %s for %pISpc\n", - ep->re_id->device->name, sap); - switch (xchg(&ep->re_connect_status, -ENODEV)) { - case 0: goto wake_connect_worker; - case 1: goto disconnected; - } - return 0; case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; goto disconnected; @@ -284,6 +277,14 @@ disconnected: return 0; } +static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn) +{ + struct rpcrdma_ep *ep = container_of(rn, struct rpcrdma_ep, re_rn); + + trace_xprtrdma_device_removal(ep->re_id); + xprt_force_disconnect(ep->re_xprt); +} + static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep) { @@ -323,6 +324,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, if (rc) goto out; + rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done); + if (rc) + goto out; + return id; out: @@ -350,6 +355,8 @@ static void rpcrdma_ep_destroy(struct kref *kref) ib_dealloc_pd(ep->re_pd); ep->re_pd = NULL; + rpcrdma_rn_unregister(ep->re_id->device, &ep->re_rn); + kfree(ep); module_put(THIS_MODULE); } @@ -505,7 +512,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) * outstanding Receives. */ rpcrdma_ep_get(ep); - rpcrdma_post_recvs(r_xprt, 1, true); + rpcrdma_post_recvs(r_xprt, 1); rc = rdma_connect(ep->re_id, &ep->re_remote_cma); if (rc) @@ -897,6 +904,8 @@ static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) static void rpcrdma_req_reset(struct rpcrdma_req *req) { + struct rpcrdma_mr *mr; + /* Credits are valid for only one connection */ req->rl_slot.rq_cong = 0; @@ -906,7 +915,19 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req) rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); - frwr_reset(req); + /* The verbs consumer can't know the state of an MR on the + * req->rl_registered list unless a successful completion + * has occurred, so they cannot be re-used. + */ + while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { + struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; + + spin_lock(&buf->rb_lock); + list_del(&mr->mr_all); + spin_unlock(&buf->rb_lock); + + frwr_mr_release(mr); + } } /* ASSUMPTION: the rb_allreqs list is stable for the duration, @@ -924,18 +945,20 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) } static noinline -struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, - bool temp) +struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct ib_device *device = ep->re_id->device; struct rpcrdma_rep *rep; rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS); if (rep == NULL) goto out; - rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, - DMA_FROM_DEVICE); + rep->rr_rdmabuf = rpcrdma_regbuf_alloc_node(ep->re_inline_recv, + DMA_FROM_DEVICE, + ibdev_to_node(device)); if (!rep->rr_rdmabuf) goto out_free; @@ -950,7 +973,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.num_sge = 1; - rep->rr_temp = temp; spin_lock(&buf->rb_lock); list_add(&rep->rr_all, &buf->rb_all_reps); @@ -969,17 +991,6 @@ static void rpcrdma_rep_free(struct rpcrdma_rep *rep) kfree(rep); } -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) -{ - struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf; - - spin_lock(&buf->rb_lock); - list_del(&rep->rr_all); - spin_unlock(&buf->rb_lock); - - rpcrdma_rep_free(rep); -} - static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) { struct llist_node *node; @@ -1011,10 +1022,8 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; - list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { + list_for_each_entry(rep, &buf->rb_all_reps, rr_all) rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); - rep->rr_temp = true; /* Mark this rep for destruction */ - } } static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) @@ -1231,14 +1240,15 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) * or Replies they may be registered externally via frwr_map. */ static struct rpcrdma_regbuf * -rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) +rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction, + int node) { struct rpcrdma_regbuf *rb; - rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS); + rb = kmalloc_node(sizeof(*rb), XPRTRDMA_GFP_FLAGS, node); if (!rb) return NULL; - rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS); + rb->rg_data = kmalloc_node(size, XPRTRDMA_GFP_FLAGS, node); if (!rb->rg_data) { kfree(rb); return NULL; @@ -1250,6 +1260,12 @@ rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) return rb; } +static struct rpcrdma_regbuf * +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) +{ + return rpcrdma_regbuf_alloc_node(size, direction, NUMA_NO_NODE); +} + /** * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer * @rb: regbuf to reallocate @@ -1327,10 +1343,9 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) * rpcrdma_post_recvs - Refill the Receive Queue * @r_xprt: controlling transport instance * @needed: current credit grant - * @temp: mark Receive buffers to be deleted after one use * */ -void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) +void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = r_xprt->rx_ep; @@ -1344,8 +1359,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) if (likely(ep->re_receive_count > needed)) goto out; needed -= ep->re_receive_count; - if (!temp) - needed += RPCRDMA_MAX_RECV_BATCH; + needed += RPCRDMA_MAX_RECV_BATCH; if (atomic_inc_return(&ep->re_receiving) > 1) goto out; @@ -1354,12 +1368,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) wr = NULL; while (needed) { rep = rpcrdma_rep_get_locked(buf); - if (rep && rep->rr_temp) { - rpcrdma_rep_destroy(rep); - continue; - } if (!rep) - rep = rpcrdma_rep_create(r_xprt, temp); + rep = rpcrdma_rep_create(r_xprt); if (!rep) break; if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index da409450dfc0..8147d2b41494 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -56,6 +56,7 @@ #include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ +#include <linux/sunrpc/rdma_rn.h> /* removal notifications */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ @@ -92,6 +93,7 @@ struct rpcrdma_ep { struct rpcrdma_connect_private re_cm_private; struct rdma_conn_param re_remote_cma; + struct rpcrdma_notification re_rn; int re_receive_count; unsigned int re_max_requests; /* depends on device */ unsigned int re_inline_send; /* negotiated */ @@ -198,7 +200,6 @@ struct rpcrdma_rep { __be32 rr_proc; int rr_wc_flags; u32 rr_inv_rkey; - bool rr_temp; struct rpcrdma_regbuf *rr_rdmabuf; struct rpcrdma_xprt *rr_rxprt; struct rpc_rqst *rr_rqst; @@ -466,7 +467,7 @@ void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc); int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); -void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp); +void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed); /* * Buffer calls - xprtrdma/verbs.c |