diff options
author | Dennis Dalessandro <dennis.dalessandro@intel.com> | 2016-01-22 22:00:22 +0100 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-03-11 02:37:24 +0100 |
commit | bfbac097b6e8023e10fdadab2527d0a1a3160d7e (patch) | |
tree | 15dda8d2f8d2a5806ce577cfd97207a776082977 /drivers | |
parent | IB/rdmavt: Add completion queue functions (diff) | |
download | linux-bfbac097b6e8023e10fdadab2527d0a1a3160d7e.tar.xz linux-bfbac097b6e8023e10fdadab2527d0a1a3160d7e.zip |
IB/rdmavt: Add post send to rdmavt
Add in a post_send and post_one_send to rdmavt. The ULP will provide a WQE
to rdmavt which will then walk and queue each element. Rdmavt will then
queue the work to be done in the driver or kick the driver's progress
routine.
There needs to be a follow on patch which adds in another lock for the
head of the queue so that it can be added to and read from in parallel.
This will touch protocol handlers and require other changes in the
drivers. This will be done separately.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/infiniband/sw/rdmavt/qp.c | 183 |
1 files changed, 171 insertions, 12 deletions
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 43346a773ff3..bd2d91a5b19a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,6 +53,27 @@ #include "qp.h" #include "vt.h" +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; rvt_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = RVT_POST_RECV_OK, + [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK, + [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK | + RVT_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK, + [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, + [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, +}; +EXPORT_SYMBOL(ib_rvt_state_ops); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, /* * Return the address of the RWQ as the offset to mmap. - * See hfi1_mmap() for details. + * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { @@ -750,6 +771,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } /** + * rvt_post_one_wr - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +{ + struct rvt_swqe *wqe; + u32 next; + int i; + int j; + int acc; + struct rvt_lkey_table *rkt; + struct rvt_pd *pd; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* IB spec says that num_sge == 0 is OK. */ + if (unlikely(wr->num_sge > qp->s_max_sge)) + return -EINVAL; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) + return -EINVAL; + } else if (qp->ibqp.qp_type != IB_QPT_RC) { + /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + return -EINVAL; + /* Check UD destination address PD */ + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + return -EINVAL; + } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { + return -EINVAL; + } + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) + return -ENOMEM; + + rkt = &rdi->lkey_table; + pd = ibpd_to_rvtpd(qp->ibqp.pd); + wqe = rvt_get_swqe_ptr(qp, qp->s_head); + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + + wqe->length = 0; + j = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval_free; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval_free; + } else { + atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + } + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + return 0; + +bail_inval_free: + /* release mr holds */ + while (j) { + struct rvt_sge *sge = &wqe->sg_list[--j]; + + rvt_put_mr(sge->mr); + } + return -EINVAL; +} + +/** * rvt_post_send - post a send on a QP * @ibqp: the QP to post the send on * @wr: the list of work requests to post @@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + unsigned long flags = 0; + int call_send; + unsigned nreq = 0; + int err = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + /* - * VT-DRIVER-API: do_send() - * Driver needs to have a do_send() call which is a single entry point - * to take an already formed packet and throw it out on the wire. Once - * the packet is sent the driver needs to make an upcall to rvt so the - * completion queue can be notified and/or any other outstanding - * work/book keeping can be finished. - * - * Note that there should also be a way for rvt to protect itself - * against hangs in the driver layer. If a send doesn't actually - * complete in a timely manor rvt needs to return an error event. + * Ensure QP state is such that we can send. If not bail out early, + * there is no need to do this every time we post a send. */ + if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { + spin_unlock_irqrestore(&qp->s_lock, flags); + return -EINVAL; + } - return -EOPNOTSUPP; + /* + * If the send queue is empty, and we only have a single WR then just go + * ahead and kick the send engine into gear. Otherwise we will always + * just schedule the send to happen later. + */ + call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; + + for (; wr; wr = wr->next) { + err = rvt_post_one_wr(qp, wr); + if (unlikely(err)) { + *bad_wr = wr; + goto bail; + } + nreq++; + } +bail: + if (nreq && !call_send) + rdi->driver_f.schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + if (nreq && call_send) + rdi->driver_f.do_send(qp); + return err; } /** |