diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-10 00:53:03 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-10 00:53:03 +0100 |
commit | a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461 (patch) | |
tree | 3dbf847105558eaac3658a46c4934df503c866a2 /drivers/infiniband/hw/cxgb4 | |
parent | Merge tag 'pci-v5.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git... (diff) | |
parent | net/mlx5: ODP support for XRC transport is not enabled by default in FW (diff) | |
download | linux-a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461.tar.xz linux-a50243b1ddcdd766d0d17fbfeeb1a22e62fdc461.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a slightly more active cycle than normal with ongoing
core changes and quite a lot of collected driver updates.
- Various driver fixes for bnxt_re, cxgb4, hns, mlx5, pvrdma, rxe
- A new data transfer mode for HFI1 giving higher performance
- Significant functional and bug fix update to the mlx5
On-Demand-Paging MR feature
- A chip hang reset recovery system for hns
- Change mm->pinned_vm to an atomic64
- Update bnxt_re to support a new 57500 chip
- A sane netlink 'rdma link add' method for creating rxe devices and
fixing the various unregistration race conditions in rxe's
unregister flow
- Allow lookup up objects by an ID over netlink
- Various reworking of the core to driver interface:
- drivers should not assume umem SGLs are in PAGE_SIZE chunks
- ucontext is accessed via udata not other means
- start to make the core code responsible for object memory
allocation
- drivers should convert struct device to struct ib_device via a
helper
- drivers have more tools to avoid use after unregister problems"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (280 commits)
net/mlx5: ODP support for XRC transport is not enabled by default in FW
IB/hfi1: Close race condition on user context disable and close
RDMA/umem: Revert broken 'off by one' fix
RDMA/umem: minor bug fix in error handling path
RDMA/hns: Use GFP_ATOMIC in hns_roce_v2_modify_qp
cxgb4: kfree mhp after the debug print
IB/rdmavt: Fix concurrency panics in QP post_send and modify to error
IB/rdmavt: Fix loopback send with invalidate ordering
IB/iser: Fix dma_nents type definition
IB/mlx5: Set correct write permissions for implicit ODP MR
bnxt_re: Clean cq for kernel consumers only
RDMA/uverbs: Don't do double free of allocated PD
RDMA: Handle ucontext allocations by IB/core
RDMA/core: Fix a WARN() message
bnxt_re: fix the regression due to changes in alloc_pbl
IB/mlx4: Increase the timeout for CM cache
IB/core: Abort page fault handler silently during owning process exit
IB/mlx5: Validate correct PD before prefetch MR
IB/mlx5: Protect against prefetch of invalid MR
RDMA/uverbs: Store PR pointer before it is overwritten
...
Diffstat (limited to 'drivers/infiniband/hw/cxgb4')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/Makefile | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 199 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 10 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 16 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/mem.c | 36 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 85 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 33 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 1 |
8 files changed, 251 insertions, 133 deletions
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index 9edd92023e18..31a87d90a40b 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,5 +1,5 @@ -ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 -ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb +ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8221813219e5..4d232bdf9e97 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -655,7 +655,33 @@ static int send_halfclose(struct c4iw_ep *ep) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -static int send_abort(struct c4iw_ep *ep) +static void read_tcb(struct c4iw_ep *ep) +{ + struct sk_buff *skb; + struct cpl_get_tcb *req; + int wrlen = roundup(sizeof(*req), 16); + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (WARN_ON(!skb)) + return; + + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + req = (struct cpl_get_tcb *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid)); + req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid)); + + /* + * keep a ref on the ep so the tcb is not unlocked before this + * cpl completes. The ref is released in read_tcb_rpl(). + */ + c4iw_get_ep(&ep->com); + if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb))) + c4iw_put_ep(&ep->com); +} + +static int send_abort_req(struct c4iw_ep *ep) { u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); @@ -670,6 +696,17 @@ static int send_abort(struct c4iw_ep *ep) return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } +static int send_abort(struct c4iw_ep *ep) +{ + if (!ep->com.qp || !ep->com.qp->srq) { + send_abort_req(ep); + return 0; + } + set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags); + read_tcb(ep); + return 0; +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req = NULL; @@ -1851,14 +1888,11 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void complete_cached_srq_buffers(struct c4iw_ep *ep, - __be32 srqidx_status) +static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx) { enum chip_type adapter_type; - u32 srqidx; adapter_type = ep->com.dev->rdev.lldi.adapter_type; - srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status)); /* * If this TCB had a srq buffer cached, then we must complete @@ -1876,6 +1910,7 @@ static void complete_cached_srq_buffers(struct c4iw_ep *ep, static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { + u32 srqidx; struct c4iw_ep *ep; struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); int release = 0; @@ -1887,7 +1922,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } - complete_cached_srq_buffers(ep, rpl->srqidx_status); + if (ep->com.qp && ep->com.qp->srq) { + srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status)); + complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx); + } pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); @@ -1903,8 +1941,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) } mutex_unlock(&ep->com.mutex); - if (release) + if (release) { + close_complete_upcall(ep, -ECONNRESET); release_ep_resources(ep); + } c4iw_put_ep(&ep->com); return 0; } @@ -2072,7 +2112,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } else { pdev = get_real_dev(n->dev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); + n, pdev, rt_tos2priority(tos)); if (!ep->l2t) goto out; ep->mtu = dst_mtu(dst); @@ -2161,7 +2201,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, + ep->com.cm_id->tos, raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -2476,7 +2517,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) u16 peer_mss = ntohs(req->tcpopt.mss); int iptype; unsigned short hdrs; - u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + u8 tos; parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); if (!parent_ep) { @@ -2490,6 +2531,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + if (parent_ep->com.cm_id->tos_set) + tos = parent_ep->com.cm_id->tos; + else + tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, local_ip, peer_ip, &local_port, &peer_port); @@ -2509,7 +2555,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(peer_port), peer_mss); dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + tos, ((struct sockaddr_in6 *) &parent_ep->com.local_addr)->sin6_scope_id); } @@ -2740,6 +2786,21 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep) +{ + complete_cached_srq_buffers(ep, ep->srqe_idx); + if (ep->com.cm_id && ep->com.qp) { + struct c4iw_qp_attributes attrs; + + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + peer_abort_upcall(ep); + release_ep_resources(ep); + c4iw_put_ep(&ep->com); +} + static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss6 *req = cplhdr(skb); @@ -2750,6 +2811,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) int release = 0; unsigned int tid = GET_TID(req); u8 status; + u32 srqidx; u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); @@ -2769,8 +2831,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) goto deref_ep; } - complete_cached_srq_buffers(ep, req->srqidx_status); - pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); set_bit(PEER_ABORT, &ep->com.history); @@ -2819,6 +2879,23 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) stop_ep_timer(ep); /*FALLTHROUGH*/ case FPDU_MODE: + if (ep->com.qp && ep->com.qp->srq) { + srqidx = ABORT_RSS_SRQIDX_G( + be32_to_cpu(req->srqidx_status)); + if (srqidx) { + complete_cached_srq_buffers(ep, + req->srqidx_status); + } else { + /* Hold ep ref until finish_peer_abort() */ + c4iw_get_ep(&ep->com); + __state_set(&ep->com, ABORTING); + set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags); + read_tcb(ep); + break; + + } + } + if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_ERROR; ret = c4iw_modify_qp(ep->com.qp->rhp, @@ -2942,15 +3019,18 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) ep = get_ep_from_tid(dev, tid); - if (ep && ep->com.qp) { - pr_warn("TERM received tid %u qpid %u\n", - tid, ep->com.qp->wq.sq.qid); - attrs.next_state = C4IW_QP_STATE_TERMINATE; - c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + if (ep) { + if (ep->com.qp) { + pr_warn("TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + + c4iw_put_ep(&ep->com); } else pr_warn("TERM received tid %u no ep/qp\n", tid); - c4iw_put_ep(&ep->com); return 0; } @@ -3318,7 +3398,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, cm_id->tos, raddr6->sin6_scope_id); } if (!ep->dst) { @@ -3606,7 +3686,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); - close_complete_upcall(ep, -ECONNRESET); ret = send_abort(ep); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); @@ -3717,6 +3796,80 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, return; } +static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word) +{ + u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]); + u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]); + u64 t; + u32 shift = 32; + + t = (thi << shift) | (tlo >> shift); + + return t; +} + +static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift) +{ + u32 v; + u64 t = be64_to_cpu(tcb[(31 - word) / 2]); + + if (word & 0x1) + shift += 32; + v = (t >> shift) & mask; + return v; +} + +static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_get_tcb_rpl *rpl = cplhdr(skb); + __be64 *tcb = (__be64 *)(rpl + 1); + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + u64 t_flags_64; + u32 rx_pdu_out; + + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to + * determine if there's a rx PDU feedback event pending. + * + * If that bit is set, it means we'll need to re-read the TCB's + * rq_start value. The final value is the one present in a TCB + * with the TF_RX_PDU_OUT bit cleared. + */ + + t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W); + rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S; + + c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */ + c4iw_put_ep(&ep->com); /* from read_tcb() */ + + /* If TF_RX_PDU_OUT bit is set, re-read the TCB */ + if (rx_pdu_out) { + if (++ep->rx_pdu_out_cnt >= 2) { + WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n"); + goto cleanup; + } + read_tcb(ep); + return 0; + } + + ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W, + TCB_RQ_START_S); +cleanup: + pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx); + + if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) + finish_peer_abort(dev, ep); + else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) + send_abort_req(ep); + else + WARN_ONCE(1, "unexpected state!"); + + return 0; +} + static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_fw6_msg *rpl = cplhdr(skb); @@ -4037,6 +4190,7 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, + [CPL_GET_TCB_RPL] = read_tcb_rpl, [CPL_FW6_MSG] = deferred_fw6_msg, [CPL_RX_PKT] = rx_pkt, [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe, @@ -4268,6 +4422,7 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, + [CPL_GET_TCB_RPL] = sched, [CPL_FW6_MSG] = fw6_msg, [CPL_RX_PKT] = sched }; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index d499cd61c0e8..c79cf63fb0bb 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -720,11 +720,8 @@ static const struct file_operations ep_debugfs_fops = { .read = debugfs_read, }; -static int setup_debugfs(struct c4iw_dev *devp) +static void setup_debugfs(struct c4iw_dev *devp) { - if (!devp->debugfs_root) - return -1; - debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root, (void *)devp, &qp_debugfs_fops, 4096); @@ -740,7 +737,6 @@ static int setup_debugfs(struct c4iw_dev *devp) if (c4iw_wr_log) debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root, (void *)devp, &wr_log_debugfs_fops, 4096); - return 0; } void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, @@ -981,7 +977,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pr_info("%s: On-Chip Queues not supported on this device\n", pci_name(infop->pdev)); - devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); + devp = ib_alloc_device(c4iw_dev, ibdev); if (!devp) { pr_err("Cannot allocate ib device\n"); return ERR_PTR(-ENOMEM); @@ -1564,8 +1560,6 @@ static int __init c4iw_init_module(void) return err; c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL); - if (!c4iw_debugfs_root) - pr_warn("could not create debugfs entry, continuing\n"); reg_workq = create_singlethread_workqueue("Register_iWARP_device"); if (!reg_workq) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f0fceadd0d12..5a5da41faef6 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -589,7 +589,6 @@ struct c4iw_ucontext { u32 key; spinlock_t mmap_lock; struct list_head mmaps; - struct kref kref; bool is_32b_cqe; }; @@ -598,18 +597,6 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) return container_of(c, struct c4iw_ucontext, ibucontext); } -void _c4iw_free_ucontext(struct kref *kref); - -static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext) -{ - kref_put(&ucontext->kref, _c4iw_free_ucontext); -} - -static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext) -{ - kref_get(&ucontext->kref); -} - struct c4iw_mm_entry { struct list_head entry; u64 addr; @@ -982,6 +969,9 @@ struct c4iw_ep { int rcv_win; u32 snd_wscale; struct c4iw_ep_stats stats; + u32 srqe_idx; + u32 rx_pdu_out_cnt; + struct sk_buff *peer_abort_skb; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 7b76e6f81aeb..5baa31ab6366 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -502,10 +502,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { __be64 *pages; - int shift, n, len; - int i, k, entry; + int shift, n, i; int err = -ENOMEM; - struct scatterlist *sg; + struct sg_dma_page_iter sg_iter; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -537,11 +536,11 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc, 0); if (IS_ERR(mhp->umem)) goto err_free_skb; - shift = mhp->umem->page_shift; + shift = PAGE_SHIFT; n = mhp->umem->nmap; err = alloc_pbl(mhp, n); @@ -556,21 +555,16 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { - len = sg_dma_len(sg) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address(sg) + - (k << shift)); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i, - mhp->wr_waitp); - if (err) - goto pbl_done; - n += i; - i = 0; - } + for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { + pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); + if (i == PAGE_SIZE / sizeof(*pages)) { + err = write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (n << 3), i, + mhp->wr_waitp); + if (err) + goto pbl_done; + n += i; + i = 0; } } @@ -684,8 +678,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw) mhp->wr_waitp); kfree_skb(mhp->dereg_skb); c4iw_put_wr_wait(mhp->wr_waitp); - kfree(mhp); pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp); + kfree(mhp); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 586b0c37481f..507c54572cc9 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -58,51 +58,34 @@ static int fastreg_support = 1; module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); -void _c4iw_free_ucontext(struct kref *kref) +static void c4iw_dealloc_ucontext(struct ib_ucontext *context) { - struct c4iw_ucontext *ucontext; + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); struct c4iw_dev *rhp; struct c4iw_mm_entry *mm, *tmp; - ucontext = container_of(kref, struct c4iw_ucontext, kref); + pr_debug("context %p\n", context); rhp = to_c4iw_dev(ucontext->ibucontext.device); - pr_debug("ucontext %p\n", ucontext); list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); - kfree(ucontext); } -static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext, + struct ib_udata *udata) { - struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); - - pr_debug("context %p\n", context); - c4iw_put_ucontext(ucontext); - return 0; -} - -static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) -{ - struct c4iw_ucontext *context; + struct ib_device *ibdev = ucontext->device; + struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext); struct c4iw_dev *rhp = to_c4iw_dev(ibdev); struct c4iw_alloc_ucontext_resp uresp; int ret = 0; struct c4iw_mm_entry *mm = NULL; pr_debug("ibdev %p\n", ibdev); - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = -ENOMEM; - goto err; - } - c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); - kref_init(&context->kref); if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n"); @@ -111,7 +94,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, mm = kmalloc(sizeof(*mm), GFP_KERNEL); if (!mm) { ret = -ENOMEM; - goto err_free; + goto err; } uresp.status_page_size = PAGE_SIZE; @@ -131,13 +114,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, mm->len = PAGE_SIZE; insert_mmap(context, mm); } - return &context->ibucontext; + return 0; err_mm: kfree(mm); -err_free: - kfree(context); err: - return ERR_PTR(ret); + return ret; } static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) @@ -209,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static int c4iw_deallocate_pd(struct ib_pd *pd) +static void c4iw_deallocate_pd(struct ib_pd *pd) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -221,15 +202,13 @@ static int c4iw_deallocate_pd(struct ib_pd *pd) mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; mutex_unlock(&rhp->rdev.stats.lock); - kfree(php); - return 0; } -static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) +static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, + struct ib_udata *udata) { - struct c4iw_pd *php; + struct c4iw_pd *php = to_c4iw_pd(pd); + struct ib_device *ibdev = pd->device; u32 pdid; struct c4iw_dev *rhp; @@ -237,12 +216,8 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, rhp = (struct c4iw_dev *) ibdev; pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); if (!pdid) - return ERR_PTR(-EINVAL); - php = kzalloc(sizeof(*php), GFP_KERNEL); - if (!php) { - c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); - return ERR_PTR(-ENOMEM); - } + return -EINVAL; + php->pdid = pdid; php->rhp = rhp; if (context) { @@ -250,7 +225,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { c4iw_deallocate_pd(&php->ibpd); - return ERR_PTR(-EFAULT); + return -EFAULT; } } mutex_lock(&rhp->rdev.stats.lock); @@ -259,7 +234,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; mutex_unlock(&rhp->rdev.stats.lock); pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php); - return &php->ibpd; + return 0; } static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, @@ -376,8 +351,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%d\n", CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); @@ -387,8 +363,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); struct ethtool_drvinfo info; struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0]; @@ -401,8 +377,9 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, c4iw_dev->rdev.lldi.pdev->device); @@ -547,6 +524,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_cq = c4iw_destroy_cq, .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, + .fill_res_entry = fill_res_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, @@ -567,6 +545,8 @@ static const struct ib_device_ops c4iw_dev_ops = { .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, + INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; void c4iw_register_device(struct work_struct *work) @@ -613,7 +593,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) { ret = -ENOMEM; goto err_dealloc_ctx; @@ -627,14 +607,13 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = c4iw_get_qp; - dev->ibdev.res.fill_res_entry = fill_res_entry; memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); - ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL); + ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); if (ret) goto err_kfree_iwcm; return; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 504cf525508f..d3a82839f5ea 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -31,6 +31,7 @@ */ #include <linux/module.h> +#include <rdma/uverbs_ioctl.h> #include "iw_cxgb4.h" @@ -632,7 +633,10 @@ static void build_rdma_write_cmpl(struct t4_sq *sq, wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); - wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); + if (wr->next->opcode == IB_WR_SEND) + wcwr->stag_inv = 0; + else + wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); wcwr->r2 = 0; wcwr->r3 = 0; @@ -726,7 +730,10 @@ static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr) /* SEND_WITH_INV swsqe */ swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; - swsqe->opcode = FW_RI_SEND_WITH_INV; + if (wr->next->opcode == IB_WR_SEND) + swsqe->opcode = FW_RI_SEND; + else + swsqe->opcode = FW_RI_SEND_WITH_INV; swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; swsqe->signaled = send_signaled; @@ -897,8 +904,6 @@ static void free_qp_work(struct work_struct *work) destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); - if (ucontext) - c4iw_put_ucontext(ucontext); c4iw_put_wr_wait(qhp->wr_waitp); kfree(qhp); } @@ -1133,9 +1138,9 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, /* * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is * the response for small NVMEe-oF READ requests. If the chain is - * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths - * meet the requirements of the fw_ri_write_cmpl_wr work request, - * then build and post the write_cmpl WR. If any of the tests + * exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths + * and lengths meet the requirements of the fw_ri_write_cmpl_wr work + * request, then build and post the write_cmpl WR. If any of the tests * below are not true, then we continue on with the tradtional WRITE * and SEND WRs. */ @@ -1145,7 +1150,8 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, wr && wr->next && !wr->next->next && wr->opcode == IB_WR_RDMA_WRITE && wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && - wr->next->opcode == IB_WR_SEND_WITH_INV && + (wr->next->opcode == IB_WR_SEND || + wr->next->opcode == IB_WR_SEND_WITH_INV) && wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && wr->next->num_sge == 1 && num_wrs >= 2) { post_write_cmpl(qhp, wr); @@ -2129,7 +2135,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; unsigned int sqsize, rqsize = 0; - struct c4iw_ucontext *ucontext; + struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct c4iw_ucontext, ibucontext); int ret; struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL; @@ -2163,8 +2170,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (sqsize < 8) sqsize = 8; - ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) return ERR_PTR(-ENOMEM); @@ -2331,7 +2336,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, insert_mmap(ucontext, ma_sync_key_mm); } - c4iw_get_ucontext(ucontext); qhp->ucontext = ucontext; } if (!attrs->srq) { @@ -2589,7 +2593,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, /* build fw_ri_res_wr */ wr_len = sizeof(*res_wr) + sizeof(*res); - skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) goto err_free_queue; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); @@ -2711,7 +2715,8 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, rqsize = attrs->attr.max_wr + 1; rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); - ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); srq = kzalloc(sizeof(*srq), GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index fff6d48d262f..b170817b2741 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -35,6 +35,7 @@ #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" +#include "t4_tcb.h" #include "t4fw_ri_api.h" #define T4_MAX_NUM_PD 65536 |