summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c183
-rw-r--r--include/rdma/rdma_vt.h7
-rw-r--r--include/rdma/rdmavt_qp.h26
3 files changed, 204 insertions, 12 deletions
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 43346a773ff3..bd2d91a5b19a 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -53,6 +53,27 @@
#include "qp.h"
#include "vt.h"
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; rvt_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = 0,
+ [IB_QPS_INIT] = RVT_POST_RECV_OK,
+ [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
+ [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+ RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
+ RVT_PROCESS_NEXT_SEND_OK,
+ [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+ RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+ RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+ [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
+ RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+};
+EXPORT_SYMBOL(ib_rvt_state_ops);
+
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
/*
* Return the address of the RWQ as the offset to mmap.
- * See hfi1_mmap() for details.
+ * See rvt_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
if (!qp->r_rq.wq) {
@@ -750,6 +771,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
/**
+ * rvt_post_one_wr - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr)
+{
+ struct rvt_swqe *wqe;
+ u32 next;
+ int i;
+ int j;
+ int acc;
+ struct rvt_lkey_table *rkt;
+ struct rvt_pd *pd;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ /* IB spec says that num_sge == 0 is OK. */
+ if (unlikely(wr->num_sge > qp->s_max_sge))
+ return -EINVAL;
+
+ /*
+ * Don't allow RDMA reads or atomic operations on UC or
+ * undefined operations.
+ * Make sure buffer is large enough to hold the result for atomics.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_UC) {
+ if ((unsigned)wr->opcode >= IB_WR_RDMA_READ)
+ return -EINVAL;
+ } else if (qp->ibqp.qp_type != IB_QPT_RC) {
+ /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+ /* Check UD destination address PD */
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
+ return -EINVAL;
+ } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
+ return -EINVAL;
+ } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+ (wr->num_sge == 0 ||
+ wr->sg_list[0].length < sizeof(u64) ||
+ wr->sg_list[0].addr & (sizeof(u64) - 1))) {
+ return -EINVAL;
+ } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+ return -EINVAL;
+ }
+
+ next = qp->s_head + 1;
+ if (next >= qp->s_size)
+ next = 0;
+ if (next == qp->s_last)
+ return -ENOMEM;
+
+ rkt = &rdi->lkey_table;
+ pd = ibpd_to_rvtpd(qp->ibqp.pd);
+ wqe = rvt_get_swqe_ptr(qp, qp->s_head);
+
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE ||
+ wr->opcode == IB_WR_RDMA_READ)
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+ else
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
+ wqe->length = 0;
+ j = 0;
+ if (wr->num_sge) {
+ acc = wr->opcode >= IB_WR_RDMA_READ ?
+ IB_ACCESS_LOCAL_WRITE : 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ u32 length = wr->sg_list[i].length;
+ int ok;
+
+ if (length == 0)
+ continue;
+ ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
+ &wr->sg_list[i], acc);
+ if (!ok)
+ goto bail_inval_free;
+ wqe->length += length;
+ j++;
+ }
+ wqe->wr.num_sge = j;
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_RC) {
+ if (wqe->length > 0x80000000U)
+ goto bail_inval_free;
+ } else {
+ atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+ }
+ wqe->ssn = qp->s_ssn++;
+ qp->s_head = next;
+
+ return 0;
+
+bail_inval_free:
+ /* release mr holds */
+ while (j) {
+ struct rvt_sge *sge = &wqe->sg_list[--j];
+
+ rvt_put_mr(sge->mr);
+ }
+ return -EINVAL;
+}
+
+/**
* rvt_post_send - post a send on a QP
* @ibqp: the QP to post the send on
* @wr: the list of work requests to post
@@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ unsigned long flags = 0;
+ int call_send;
+ unsigned nreq = 0;
+ int err = 0;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
/*
- * VT-DRIVER-API: do_send()
- * Driver needs to have a do_send() call which is a single entry point
- * to take an already formed packet and throw it out on the wire. Once
- * the packet is sent the driver needs to make an upcall to rvt so the
- * completion queue can be notified and/or any other outstanding
- * work/book keeping can be finished.
- *
- * Note that there should also be a way for rvt to protect itself
- * against hangs in the driver layer. If a send doesn't actually
- * complete in a timely manor rvt needs to return an error event.
+ * Ensure QP state is such that we can send. If not bail out early,
+ * there is no need to do this every time we post a send.
*/
+ if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return -EINVAL;
+ }
- return -EOPNOTSUPP;
+ /*
+ * If the send queue is empty, and we only have a single WR then just go
+ * ahead and kick the send engine into gear. Otherwise we will always
+ * just schedule the send to happen later.
+ */
+ call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
+
+ for (; wr; wr = wr->next) {
+ err = rvt_post_one_wr(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto bail;
+ }
+ nreq++;
+ }
+bail:
+ if (nreq && !call_send)
+ rdi->driver_f.schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (nreq && call_send)
+ rdi->driver_f.do_send(qp);
+ return err;
}
/**
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 79080e3b09f8..36e4fb4c0df3 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -231,6 +231,8 @@ struct rvt_driver_provided {
gfp_t gfp);
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void (*notify_qp_reset)(struct rvt_qp *qp);
+ void (*schedule_send)(struct rvt_qp *qp);
+ void (*do_send)(struct rvt_qp *qp);
/*--------------------*/
/* Optional functions */
@@ -312,6 +314,11 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct rvt_srq, ibsrq);
}
+static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct rvt_qp, ibqp);
+}
+
static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
{
/*
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index bce0a03a7c07..3189f195538c 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -129,6 +129,17 @@
/* Number of bits to pay attention to in the opcode for checking qp type */
#define RVT_OPCODE_QP_MASK 0xE0
+/* Flags for checking QP state (see ib_rvt_state_ops[]) */
+#define RVT_POST_SEND_OK 0x01
+#define RVT_POST_RECV_OK 0x02
+#define RVT_PROCESS_RECV_OK 0x04
+#define RVT_PROCESS_SEND_OK 0x08
+#define RVT_PROCESS_NEXT_SEND_OK 0x10
+#define RVT_FLUSH_SEND 0x20
+#define RVT_FLUSH_RECV 0x40
+#define RVT_PROCESS_OR_FLUSH_SEND \
+ (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND)
+
/*
* Send work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
@@ -373,4 +384,19 @@ struct rvt_qp_ibdev {
struct rvt_qpn_table qpn_table;
};
+/*
+ * Since struct rvt_swqe is not a fixed size, we can't simply index into
+ * struct hfi1_qp.s_wq. This function does the array index computation.
+ */
+static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp,
+ unsigned n)
+{
+ return (struct rvt_swqe *)((char *)qp->s_wq +
+ (sizeof(struct rvt_swqe) +
+ qp->s_max_sge *
+ sizeof(struct rvt_sge)) * n);
+}
+
+extern const int ib_rvt_state_ops[];
+
#endif /* DEF_RDMAVT_INCQP_H */