summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/cma.c4
-rw-r--r--drivers/infiniband/core/iwcm.c4
-rw-r--r--drivers/infiniband/core/iwpm_util.c1
-rw-r--r--drivers/infiniband/core/mr_pool.c86
-rw-r--r--drivers/infiniband/core/netlink.c5
-rw-r--r--drivers/infiniband/core/rw.c727
-rw-r--r--drivers/infiniband/core/sa_query.c4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c11
-rw-r--r--drivers/infiniband/core/verbs.c172
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c611
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h14
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c12
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h7
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c148
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c185
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c14
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c56
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c9
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_status.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h14
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c106
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h36
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c47
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c294
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c102
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c9
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c7
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c5
-rw-r--r--drivers/infiniband/hw/mlx5/main.c102
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c25
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c20
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c60
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c43
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h5
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c67
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c4
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c841
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h69
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c229
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c729
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h31
64 files changed, 2900 insertions, 2124 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index f818538a7f4e..26987d9d7e1c 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,9 +8,9 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
-ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
+ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
- roce_gid_mgmt.o
+ roce_gid_mgmt.o mr_pool.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93ab0ae97208..f0c91ba3178a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -800,6 +800,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
if (id->device != pd->device)
return -EINVAL;
+ qp_init_attr->port_num = id->port_num;
qp = ib_create_qp(pd, qp_init_attr);
if (IS_ERR(qp))
return PTR_ERR(qp);
@@ -4294,7 +4295,8 @@ static int __init cma_init(void)
if (ret)
goto err;
- if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
+ if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table),
+ cma_cb_table))
pr_warn("RDMA CMA: failed to add netlink callback\n");
cma_configfs_init();
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index e28a160cdab0..f0572049d291 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -459,7 +459,7 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
if (pm_addr->ss_family == AF_INET) {
struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr;
- if (pm4_addr->sin_addr.s_addr == INADDR_ANY) {
+ if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) {
struct sockaddr_in *cm4_addr =
(struct sockaddr_in *)cm_addr;
struct sockaddr_in *cm4_outaddr =
@@ -1175,7 +1175,7 @@ static int __init iw_cm_init(void)
if (ret)
pr_err("iw_cm: couldn't init iwpm\n");
- ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS,
+ ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table),
iwcm_nl_cb_table);
if (ret)
pr_err("iw_cm: couldn't register netlink callbacks\n");
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 9b2bf2fb2b00..b65e06c560d7 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -634,6 +634,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
+ dev_kfree_skb(skb);
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c
new file mode 100644
index 000000000000..49d478b2ea94
--- /dev/null
+++ b/drivers/infiniband/core/mr_pool.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <rdma/ib_verbs.h>
+#include <rdma/mr_pool.h>
+
+struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list)
+{
+ struct ib_mr *mr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ mr = list_first_entry_or_null(list, struct ib_mr, qp_entry);
+ if (mr) {
+ list_del(&mr->qp_entry);
+ qp->mrs_used++;
+ }
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_mr_pool_get);
+
+void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ list_add(&mr->qp_entry, list);
+ qp->mrs_used--;
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+}
+EXPORT_SYMBOL(ib_mr_pool_put);
+
+int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
+ enum ib_mr_type type, u32 max_num_sg)
+{
+ struct ib_mr *mr;
+ unsigned long flags;
+ int ret, i;
+
+ for (i = 0; i < nr; i++) {
+ mr = ib_alloc_mr(qp->pd, type, max_num_sg);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto out;
+ }
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ list_add_tail(&mr->qp_entry, list);
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+ }
+
+ return 0;
+out:
+ ib_mr_pool_destroy(qp, list);
+ return ret;
+}
+EXPORT_SYMBOL(ib_mr_pool_init);
+
+void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list)
+{
+ struct ib_mr *mr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ while (!list_empty(list)) {
+ mr = list_first_entry(list, struct ib_mr, qp_entry);
+ list_del(&mr->qp_entry);
+
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+ ib_dereg_mr(mr);
+ spin_lock_irqsave(&qp->mr_lock, flags);
+ }
+ spin_unlock_irqrestore(&qp->mr_lock, flags);
+}
+EXPORT_SYMBOL(ib_mr_pool_destroy);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index d47df9356779..9b8c20c8209b 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -151,12 +151,11 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct ibnl_client *client;
int type = nlh->nlmsg_type;
int index = RDMA_NL_GET_CLIENT(type);
- int op = RDMA_NL_GET_OP(type);
+ unsigned int op = RDMA_NL_GET_OP(type);
list_for_each_entry(client, &client_list, list) {
if (client->index == index) {
- if (op < 0 || op >= client->nops ||
- !client->cb_table[op].dump)
+ if (op >= client->nops || !client->cb_table[op].dump)
return -EINVAL;
/*
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
new file mode 100644
index 000000000000..1eb9b1294a63
--- /dev/null
+++ b/drivers/infiniband/core/rw.c
@@ -0,0 +1,727 @@
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <rdma/mr_pool.h>
+#include <rdma/rw.h>
+
+enum {
+ RDMA_RW_SINGLE_WR,
+ RDMA_RW_MULTI_WR,
+ RDMA_RW_MR,
+ RDMA_RW_SIG_MR,
+};
+
+static bool rdma_rw_force_mr;
+module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
+MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
+
+/*
+ * Check if the device might use memory registration. This is currently only
+ * true for iWarp devices. In the future we can hopefully fine tune this based
+ * on HCA driver input.
+ */
+static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
+{
+ if (rdma_protocol_iwarp(dev, port_num))
+ return true;
+ if (unlikely(rdma_rw_force_mr))
+ return true;
+ return false;
+}
+
+/*
+ * Check if the device will use memory registration for this RW operation.
+ * We currently always use memory registrations for iWarp RDMA READs, and
+ * have a debug option to force usage of MRs.
+ *
+ * XXX: In the future we can hopefully fine tune this based on HCA driver
+ * input.
+ */
+static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
+ enum dma_data_direction dir, int dma_nents)
+{
+ if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
+ return true;
+ if (unlikely(rdma_rw_force_mr))
+ return true;
+ return false;
+}
+
+static inline u32 rdma_rw_max_sge(struct ib_device *dev,
+ enum dma_data_direction dir)
+{
+ return dir == DMA_TO_DEVICE ?
+ dev->attrs.max_sge : dev->attrs.max_sge_rd;
+}
+
+static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
+{
+ /* arbitrary limit to avoid allocating gigantic resources */
+ return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
+}
+
+static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
+ struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
+ u32 sg_cnt, u32 offset)
+{
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 nents = min(sg_cnt, pages_per_mr);
+ int count = 0, ret;
+
+ reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
+ if (!reg->mr)
+ return -EAGAIN;
+
+ if (reg->mr->need_inval) {
+ reg->inv_wr.opcode = IB_WR_LOCAL_INV;
+ reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
+ reg->inv_wr.next = &reg->reg_wr.wr;
+ count++;
+ } else {
+ reg->inv_wr.next = NULL;
+ }
+
+ ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
+ if (ret < nents) {
+ ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
+ return -EINVAL;
+ }
+
+ reg->reg_wr.wr.opcode = IB_WR_REG_MR;
+ reg->reg_wr.mr = reg->mr;
+ reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
+ if (rdma_protocol_iwarp(qp->device, port_num))
+ reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
+ count++;
+
+ reg->sge.addr = reg->mr->iova;
+ reg->sge.length = reg->mr->length;
+ return count;
+}
+
+static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ int i, j, ret = 0, count = 0;
+
+ ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
+ ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
+ if (!ctx->reg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < ctx->nr_ops; i++) {
+ struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL;
+ struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
+ u32 nents = min(sg_cnt, pages_per_mr);
+
+ ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
+ offset);
+ if (ret < 0)
+ goto out_free;
+ count += ret;
+
+ if (prev) {
+ if (reg->mr->need_inval)
+ prev->wr.wr.next = &reg->inv_wr;
+ else
+ prev->wr.wr.next = &reg->reg_wr.wr;
+ }
+
+ reg->reg_wr.wr.next = &reg->wr.wr;
+
+ reg->wr.wr.sg_list = &reg->sge;
+ reg->wr.wr.num_sge = 1;
+ reg->wr.remote_addr = remote_addr;
+ reg->wr.rkey = rkey;
+ if (dir == DMA_TO_DEVICE) {
+ reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
+ } else if (!rdma_cap_read_inv(qp->device, port_num)) {
+ reg->wr.wr.opcode = IB_WR_RDMA_READ;
+ } else {
+ reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
+ }
+ count++;
+
+ remote_addr += reg->sge.length;
+ sg_cnt -= nents;
+ for (j = 0; j < nents; j++)
+ sg = sg_next(sg);
+ offset = 0;
+ }
+
+ ctx->type = RDMA_RW_MR;
+ return count;
+
+out_free:
+ while (--i >= 0)
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
+ kfree(ctx->reg);
+out:
+ return ret;
+}
+
+static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ u32 max_sge = rdma_rw_max_sge(dev, dir);
+ struct ib_sge *sge;
+ u32 total_len = 0, i, j;
+
+ ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
+
+ ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
+ if (!ctx->map.sges)
+ goto out;
+
+ ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
+ if (!ctx->map.wrs)
+ goto out_free_sges;
+
+ for (i = 0; i < ctx->nr_ops; i++) {
+ struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
+ u32 nr_sge = min(sg_cnt, max_sge);
+
+ if (dir == DMA_TO_DEVICE)
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ else
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ rdma_wr->remote_addr = remote_addr + total_len;
+ rdma_wr->rkey = rkey;
+ rdma_wr->wr.sg_list = sge;
+
+ for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
+ rdma_wr->wr.num_sge++;
+
+ sge->addr = ib_sg_dma_address(dev, sg) + offset;
+ sge->length = ib_sg_dma_len(dev, sg) - offset;
+ sge->lkey = qp->pd->local_dma_lkey;
+
+ total_len += sge->length;
+ sge++;
+ sg_cnt--;
+ offset = 0;
+ }
+
+ if (i + 1 < ctx->nr_ops)
+ rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr;
+ }
+
+ ctx->type = RDMA_RW_MULTI_WR;
+ return ctx->nr_ops;
+
+out_free_sges:
+ kfree(ctx->map.sges);
+out:
+ return -ENOMEM;
+}
+
+static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
+ enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
+
+ ctx->nr_ops = 1;
+
+ ctx->single.sge.lkey = qp->pd->local_dma_lkey;
+ ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset;
+ ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset;
+
+ memset(rdma_wr, 0, sizeof(*rdma_wr));
+ if (dir == DMA_TO_DEVICE)
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ else
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ rdma_wr->wr.sg_list = &ctx->single.sge;
+ rdma_wr->wr.num_sge = 1;
+ rdma_wr->remote_addr = remote_addr;
+ rdma_wr->rkey = rkey;
+
+ ctx->type = RDMA_RW_SINGLE_WR;
+ return 1;
+}
+
+/**
+ * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
+ * @ctx: context to initialize
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist to READ/WRITE from/to
+ * @sg_cnt: number of entries in @sg
+ * @sg_offset: current byte offset into @sg
+ * @remote_addr:remote address to read/write (relative to @rkey)
+ * @rkey: remote key to operate on
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ *
+ * Returns the number of WQEs that will be needed on the workqueue if
+ * successful, or a negative error code.
+ */
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ int ret;
+
+ ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
+ if (!ret)
+ return -ENOMEM;
+ sg_cnt = ret;
+
+ /*
+ * Skip to the S/G entry that sg_offset falls into:
+ */
+ for (;;) {
+ u32 len = ib_sg_dma_len(dev, sg);
+
+ if (sg_offset < len)
+ break;
+
+ sg = sg_next(sg);
+ sg_offset -= len;
+ sg_cnt--;
+ }
+
+ ret = -EIO;
+ if (WARN_ON_ONCE(sg_cnt == 0))
+ goto out_unmap_sg;
+
+ if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
+ ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
+ sg_offset, remote_addr, rkey, dir);
+ } else if (sg_cnt > 1) {
+ ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
+ remote_addr, rkey, dir);
+ } else {
+ ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
+ remote_addr, rkey, dir);
+ }
+
+ if (ret < 0)
+ goto out_unmap_sg;
+ return ret;
+
+out_unmap_sg:
+ ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_rw_ctx_init);
+
+/**
+ * rdma_rw_ctx_signature init - initialize a RW context with signature offload
+ * @ctx: context to initialize
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist to READ/WRITE from/to
+ * @sg_cnt: number of entries in @sg
+ * @prot_sg: scatterlist to READ/WRITE protection information from/to
+ * @prot_sg_cnt: number of entries in @prot_sg
+ * @sig_attrs: signature offloading algorithms
+ * @remote_addr:remote address to read/write (relative to @rkey)
+ * @rkey: remote key to operate on
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ *
+ * Returns the number of WQEs that will be needed on the workqueue if
+ * successful, or a negative error code.
+ */
+int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ struct scatterlist *prot_sg, u32 prot_sg_cnt,
+ struct ib_sig_attrs *sig_attrs,
+ u64 remote_addr, u32 rkey, enum dma_data_direction dir)
+{
+ struct ib_device *dev = qp->pd->device;
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ struct ib_rdma_wr *rdma_wr;
+ struct ib_send_wr *prev_wr = NULL;
+ int count = 0, ret;
+
+ if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
+ pr_err("SG count too large\n");
+ return -EINVAL;
+ }
+
+ ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
+ if (!ret)
+ return -ENOMEM;
+ sg_cnt = ret;
+
+ ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
+ if (!ret) {
+ ret = -ENOMEM;
+ goto out_unmap_sg;
+ }
+ prot_sg_cnt = ret;
+
+ ctx->type = RDMA_RW_SIG_MR;
+ ctx->nr_ops = 1;
+ ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL);
+ if (!ctx->sig) {
+ ret = -ENOMEM;
+ goto out_unmap_prot_sg;
+ }
+
+ ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0);
+ if (ret < 0)
+ goto out_free_ctx;
+ count += ret;
+ prev_wr = &ctx->sig->data.reg_wr.wr;
+
+ if (prot_sg_cnt) {
+ ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
+ prot_sg, prot_sg_cnt, 0);
+ if (ret < 0)
+ goto out_destroy_data_mr;
+ count += ret;
+
+ if (ctx->sig->prot.inv_wr.next)
+ prev_wr->next = &ctx->sig->prot.inv_wr;
+ else
+ prev_wr->next = &ctx->sig->prot.reg_wr.wr;
+ prev_wr = &ctx->sig->prot.reg_wr.wr;
+ } else {
+ ctx->sig->prot.mr = NULL;
+ }
+
+ ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs);
+ if (!ctx->sig->sig_mr) {
+ ret = -EAGAIN;
+ goto out_destroy_prot_mr;
+ }
+
+ if (ctx->sig->sig_mr->need_inval) {
+ memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr));
+
+ ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV;
+ ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey;
+
+ prev_wr->next = &ctx->sig->sig_inv_wr;
+ prev_wr = &ctx->sig->sig_inv_wr;
+ }
+
+ ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
+ ctx->sig->sig_wr.wr.wr_cqe = NULL;
+ ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge;
+ ctx->sig->sig_wr.wr.num_sge = 1;
+ ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
+ ctx->sig->sig_wr.sig_attrs = sig_attrs;
+ ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr;
+ if (prot_sg_cnt)
+ ctx->sig->sig_wr.prot = &ctx->sig->prot.sge;
+ prev_wr->next = &ctx->sig->sig_wr.wr;
+ prev_wr = &ctx->sig->sig_wr.wr;
+ count++;
+
+ ctx->sig->sig_sge.addr = 0;
+ ctx->sig->sig_sge.length = ctx->sig->data.sge.length;
+ if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE)
+ ctx->sig->sig_sge.length += ctx->sig->prot.sge.length;
+
+ rdma_wr = &ctx->sig->data.wr;
+ rdma_wr->wr.sg_list = &ctx->sig->sig_sge;
+ rdma_wr->wr.num_sge = 1;
+ rdma_wr->remote_addr = remote_addr;
+ rdma_wr->rkey = rkey;
+ if (dir == DMA_TO_DEVICE)
+ rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+ else
+ rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+ prev_wr->next = &rdma_wr->wr;
+ prev_wr = &rdma_wr->wr;
+ count++;
+
+ return count;
+
+out_destroy_prot_mr:
+ if (prot_sg_cnt)
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
+out_destroy_data_mr:
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
+out_free_ctx:
+ kfree(ctx->sig);
+out_unmap_prot_sg:
+ ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
+out_unmap_sg:
+ ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
+
+/*
+ * Now that we are going to post the WRs we can update the lkey and need_inval
+ * state on the MRs. If we were doing this at init time, we would get double
+ * or missing invalidations if a context was initialized but not actually
+ * posted.
+ */
+static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
+{
+ reg->mr->need_inval = need_inval;
+ ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
+ reg->reg_wr.key = reg->mr->lkey;
+ reg->sge.lkey = reg->mr->lkey;
+}
+
+/**
+ * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
+ * @ctx: context to operate on
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @cqe: completion queue entry for the last WR
+ * @chain_wr: WR to append to the posted chain
+ *
+ * Return the WR chain for the set of RDMA READ/WRITE operations described by
+ * @ctx, as well as any memory registration operations needed. If @chain_wr
+ * is non-NULL the WR it points to will be appended to the chain of WRs posted.
+ * If @chain_wr is not set @cqe must be set so that the caller gets a
+ * completion notification.
+ */
+struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+{
+ struct ib_send_wr *first_wr, *last_wr;
+ int i;
+
+ switch (ctx->type) {
+ case RDMA_RW_SIG_MR:
+ rdma_rw_update_lkey(&ctx->sig->data, true);
+ if (ctx->sig->prot.mr)
+ rdma_rw_update_lkey(&ctx->sig->prot, true);
+
+ ctx->sig->sig_mr->need_inval = true;
+ ib_update_fast_reg_key(ctx->sig->sig_mr,
+ ib_inc_rkey(ctx->sig->sig_mr->lkey));
+ ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey;
+
+ if (ctx->sig->data.inv_wr.next)
+ first_wr = &ctx->sig->data.inv_wr;
+ else
+ first_wr = &ctx->sig->data.reg_wr.wr;
+ last_wr = &ctx->sig->data.wr.wr;
+ break;
+ case RDMA_RW_MR:
+ for (i = 0; i < ctx->nr_ops; i++) {
+ rdma_rw_update_lkey(&ctx->reg[i],
+ ctx->reg[i].wr.wr.opcode !=
+ IB_WR_RDMA_READ_WITH_INV);
+ }
+
+ if (ctx->reg[0].inv_wr.next)
+ first_wr = &ctx->reg[0].inv_wr;
+ else
+ first_wr = &ctx->reg[0].reg_wr.wr;
+ last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
+ break;
+ case RDMA_RW_MULTI_WR:
+ first_wr = &ctx->map.wrs[0].wr;
+ last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
+ break;
+ case RDMA_RW_SINGLE_WR:
+ first_wr = &ctx->single.wr.wr;
+ last_wr = &ctx->single.wr.wr;
+ break;
+ default:
+ BUG();
+ }
+
+ if (chain_wr) {
+ last_wr->next = chain_wr;
+ } else {
+ last_wr->wr_cqe = cqe;
+ last_wr->send_flags |= IB_SEND_SIGNALED;
+ }
+
+ return first_wr;
+}
+EXPORT_SYMBOL(rdma_rw_ctx_wrs);
+
+/**
+ * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
+ * @ctx: context to operate on
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @cqe: completion queue entry for the last WR
+ * @chain_wr: WR to append to the posted chain
+ *
+ * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
+ * any memory registration operations needed. If @chain_wr is non-NULL the
+ * WR it points to will be appended to the chain of WRs posted. If @chain_wr
+ * is not set @cqe must be set so that the caller gets a completion
+ * notification.
+ */
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+{
+ struct ib_send_wr *first_wr, *bad_wr;
+
+ first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
+ return ib_post_send(qp, first_wr, &bad_wr);
+}
+EXPORT_SYMBOL(rdma_rw_ctx_post);
+
+/**
+ * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
+ * @ctx: context to release
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist that was used for the READ/WRITE
+ * @sg_cnt: number of entries in @sg
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ */
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
+{
+ int i;
+
+ switch (ctx->type) {
+ case RDMA_RW_MR:
+ for (i = 0; i < ctx->nr_ops; i++)
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
+ kfree(ctx->reg);
+ break;
+ case RDMA_RW_MULTI_WR:
+ kfree(ctx->map.wrs);
+ kfree(ctx->map.sges);
+ break;
+ case RDMA_RW_SINGLE_WR:
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+}
+EXPORT_SYMBOL(rdma_rw_ctx_destroy);
+
+/**
+ * rdma_rw_ctx_destroy_signature - release all resources allocated by
+ * rdma_rw_ctx_init_signature
+ * @ctx: context to release
+ * @qp: queue pair to operate on
+ * @port_num: port num to which the connection is bound
+ * @sg: scatterlist that was used for the READ/WRITE
+ * @sg_cnt: number of entries in @sg
+ * @prot_sg: scatterlist that was used for the READ/WRITE of the PI
+ * @prot_sg_cnt: number of entries in @prot_sg
+ * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
+ */
+void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ struct scatterlist *prot_sg, u32 prot_sg_cnt,
+ enum dma_data_direction dir)
+{
+ if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
+ return;
+
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+
+ if (ctx->sig->prot.mr) {
+ ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
+ ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
+ }
+
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr);
+ kfree(ctx->sig);
+}
+EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
+
+void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
+{
+ u32 factor;
+
+ WARN_ON_ONCE(attr->port_num == 0);
+
+ /*
+ * Each context needs at least one RDMA READ or WRITE WR.
+ *
+ * For some hardware we might need more, eventually we should ask the
+ * HCA driver for a multiplier here.
+ */
+ factor = 1;
+
+ /*
+ * If the devices needs MRs to perform RDMA READ or WRITE operations,
+ * we'll need two additional MRs for the registrations and the
+ * invalidation.
+ */
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
+ factor += 6; /* (inv + reg) * (data + prot + sig) */
+ else if (rdma_rw_can_use_mr(dev, attr->port_num))
+ factor += 2; /* inv + reg */
+
+ attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
+
+ /*
+ * But maybe we were just too high in the sky and the device doesn't
+ * even support all we need, and we'll have to live with what we get..
+ */
+ attr->cap.max_send_wr =
+ min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
+}
+
+int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
+{
+ struct ib_device *dev = qp->pd->device;
+ u32 nr_mrs = 0, nr_sig_mrs = 0;
+ int ret = 0;
+
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) {
+ nr_sig_mrs = attr->cap.max_rdma_ctxs;
+ nr_mrs = attr->cap.max_rdma_ctxs * 2;
+ } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
+ nr_mrs = attr->cap.max_rdma_ctxs;
+ }
+
+ if (nr_mrs) {
+ ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
+ IB_MR_TYPE_MEM_REG,
+ rdma_rw_fr_page_list_len(dev));
+ if (ret) {
+ pr_err("%s: failed to allocated %d MRs\n",
+ __func__, nr_mrs);
+ return ret;
+ }
+ }
+
+ if (nr_sig_mrs) {
+ ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
+ IB_MR_TYPE_SIGNATURE, 2);
+ if (ret) {
+ pr_err("%s: failed to allocated %d SIG MRs\n",
+ __func__, nr_mrs);
+ goto out_free_rdma_mrs;
+ }
+ }
+
+ return 0;
+
+out_free_rdma_mrs:
+ ib_mr_pool_destroy(qp, &qp->rdma_mrs);
+ return ret;
+}
+
+void rdma_rw_cleanup_mrs(struct ib_qp *qp)
+{
+ ib_mr_pool_destroy(qp, &qp->sig_mrs);
+ ib_mr_pool_destroy(qp, &qp->rdma_mrs);
+}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8a09c0fb268d..3ebd108bcc5f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -536,7 +536,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
if (!data) {
- kfree_skb(skb);
+ nlmsg_free(skb);
return -EMSGSIZE;
}
@@ -1820,7 +1820,7 @@ static int __init ib_sa_init(void)
goto err3;
}
- if (ibnl_add_client(RDMA_NL_LS, RDMA_NL_LS_NUM_OPS,
+ if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
ib_sa_cb_table)) {
pr_err("Failed to add netlink callback\n");
ret = -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6fdc7ecdaca0..1a8babb8ee3c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1833,7 +1833,8 @@ static int create_qp(struct ib_uverbs_file *file,
if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
- IB_QP_CREATE_MANAGED_RECV)) {
+ IB_QP_CREATE_MANAGED_RECV |
+ IB_QP_CREATE_SCATTER_FCS)) {
ret = -EINVAL;
goto err_put;
}
@@ -3088,8 +3089,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
- !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+ if (!capable(CAP_NET_RAW))
return -EPERM;
if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
@@ -3655,6 +3655,11 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.hca_core_clock = attr.hca_core_clock;
resp.response_length += sizeof(resp.hca_core_clock);
+ if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
+ goto end;
+
+ resp.device_cap_flags_ex = attr.device_cap_flags;
+ resp.response_length += sizeof(resp.device_cap_flags_ex);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b65b3541e732..1d7d4cf442e3 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -48,6 +48,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+#include <rdma/rw.h>
#include "core_priv.h"
@@ -723,59 +724,89 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
}
EXPORT_SYMBOL(ib_open_qp);
+static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct ib_qp *real_qp = qp;
+
+ qp->event_handler = __ib_shared_qp_event_handler;
+ qp->qp_context = qp;
+ qp->pd = NULL;
+ qp->send_cq = qp->recv_cq = NULL;
+ qp->srq = NULL;
+ qp->xrcd = qp_init_attr->xrcd;
+ atomic_inc(&qp_init_attr->xrcd->usecnt);
+ INIT_LIST_HEAD(&qp->open_list);
+
+ qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
+ qp_init_attr->qp_context);
+ if (!IS_ERR(qp))
+ __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+ else
+ real_qp->device->destroy_qp(real_qp);
+ return qp;
+}
+
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
- struct ib_qp *qp, *real_qp;
- struct ib_device *device;
+ struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
+ struct ib_qp *qp;
+ int ret;
+
+ /*
+ * If the callers is using the RDMA API calculate the resources
+ * needed for the RDMA READ/WRITE operations.
+ *
+ * Note that these callers need to pass in a port number.
+ */
+ if (qp_init_attr->cap.max_rdma_ctxs)
+ rdma_rw_init_qp(device, qp_init_attr);
- device = pd ? pd->device : qp_init_attr->xrcd->device;
qp = device->create_qp(pd, qp_init_attr, NULL);
+ if (IS_ERR(qp))
+ return qp;
+
+ qp->device = device;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = qp_init_attr->qp_type;
+
+ atomic_set(&qp->usecnt, 0);
+ qp->mrs_used = 0;
+ spin_lock_init(&qp->mr_lock);
+ INIT_LIST_HEAD(&qp->rdma_mrs);
+ INIT_LIST_HEAD(&qp->sig_mrs);
+
+ if (qp_init_attr->qp_type == IB_QPT_XRC_TGT)
+ return ib_create_xrc_qp(qp, qp_init_attr);
+
+ qp->event_handler = qp_init_attr->event_handler;
+ qp->qp_context = qp_init_attr->qp_context;
+ if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
+ qp->recv_cq = NULL;
+ qp->srq = NULL;
+ } else {
+ qp->recv_cq = qp_init_attr->recv_cq;
+ atomic_inc(&qp_init_attr->recv_cq->usecnt);
+ qp->srq = qp_init_attr->srq;
+ if (qp->srq)
+ atomic_inc(&qp_init_attr->srq->usecnt);
+ }
- if (!IS_ERR(qp)) {
- qp->device = device;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = qp_init_attr->qp_type;
-
- atomic_set(&qp->usecnt, 0);
- if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
- qp->event_handler = __ib_shared_qp_event_handler;
- qp->qp_context = qp;
- qp->pd = NULL;
- qp->send_cq = qp->recv_cq = NULL;
- qp->srq = NULL;
- qp->xrcd = qp_init_attr->xrcd;
- atomic_inc(&qp_init_attr->xrcd->usecnt);
- INIT_LIST_HEAD(&qp->open_list);
-
- real_qp = qp;
- qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
- qp_init_attr->qp_context);
- if (!IS_ERR(qp))
- __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
- else
- real_qp->device->destroy_qp(real_qp);
- } else {
- qp->event_handler = qp_init_attr->event_handler;
- qp->qp_context = qp_init_attr->qp_context;
- if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
- qp->recv_cq = NULL;
- qp->srq = NULL;
- } else {
- qp->recv_cq = qp_init_attr->recv_cq;
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
- qp->srq = qp_init_attr->srq;
- if (qp->srq)
- atomic_inc(&qp_init_attr->srq->usecnt);
- }
+ qp->pd = pd;
+ qp->send_cq = qp_init_attr->send_cq;
+ qp->xrcd = NULL;
- qp->pd = pd;
- qp->send_cq = qp_init_attr->send_cq;
- qp->xrcd = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&qp_init_attr->send_cq->usecnt);
- atomic_inc(&pd->usecnt);
- atomic_inc(&qp_init_attr->send_cq->usecnt);
+ if (qp_init_attr->cap.max_rdma_ctxs) {
+ ret = rdma_rw_init_mrs(qp, qp_init_attr);
+ if (ret) {
+ pr_err("failed to init MR pool ret= %d\n", ret);
+ ib_destroy_qp(qp);
+ qp = ERR_PTR(ret);
}
}
@@ -1250,6 +1281,8 @@ int ib_destroy_qp(struct ib_qp *qp)
struct ib_srq *srq;
int ret;
+ WARN_ON_ONCE(qp->mrs_used > 0);
+
if (atomic_read(&qp->usecnt))
return -EBUSY;
@@ -1261,6 +1294,9 @@ int ib_destroy_qp(struct ib_qp *qp)
rcq = qp->recv_cq;
srq = qp->srq;
+ if (!qp->uobject)
+ rdma_rw_cleanup_mrs(qp);
+
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
@@ -1343,6 +1379,7 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
}
return mr;
@@ -1389,6 +1426,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
}
return mr;
@@ -1597,6 +1635,7 @@ EXPORT_SYMBOL(ib_set_vf_guid);
* @mr: memory region
* @sg: dma mapped scatterlist
* @sg_nents: number of entries in sg
+ * @sg_offset: offset in bytes into sg
* @page_size: page vector desired page size
*
* Constraints:
@@ -1615,17 +1654,15 @@ EXPORT_SYMBOL(ib_set_vf_guid);
* After this completes successfully, the memory region
* is ready for registration.
*/
-int ib_map_mr_sg(struct ib_mr *mr,
- struct scatterlist *sg,
- int sg_nents,
- unsigned int page_size)
+int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset, unsigned int page_size)
{
if (unlikely(!mr->device->map_mr_sg))
return -ENOSYS;
mr->page_size = page_size;
- return mr->device->map_mr_sg(mr, sg, sg_nents);
+ return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset);
}
EXPORT_SYMBOL(ib_map_mr_sg);
@@ -1635,6 +1672,10 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @mr: memory region
* @sgl: dma mapped scatterlist
* @sg_nents: number of entries in sg
+ * @sg_offset_p: IN: start offset in bytes into sg
+ * OUT: offset in bytes for element n of the sg of the first
+ * byte that has not been processed where n is the return
+ * value of this function.
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to convert the largest
@@ -1645,23 +1686,26 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* Returns the number of sg elements that were assigned to
* a page vector.
*/
-int ib_sg_to_pages(struct ib_mr *mr,
- struct scatterlist *sgl,
- int sg_nents,
- int (*set_page)(struct ib_mr *, u64))
+int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
+ unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64))
{
struct scatterlist *sg;
u64 last_end_dma_addr = 0;
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
int i, ret;
- mr->iova = sg_dma_address(&sgl[0]);
+ if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0])))
+ return -EINVAL;
+
+ mr->iova = sg_dma_address(&sgl[0]) + sg_offset;
mr->length = 0;
for_each_sg(sgl, sg, sg_nents, i) {
- u64 dma_addr = sg_dma_address(sg);
- unsigned int dma_len = sg_dma_len(sg);
+ u64 dma_addr = sg_dma_address(sg) + sg_offset;
+ u64 prev_addr = dma_addr;
+ unsigned int dma_len = sg_dma_len(sg) - sg_offset;
u64 end_dma_addr = dma_addr + dma_len;
u64 page_addr = dma_addr & page_mask;
@@ -1685,8 +1729,14 @@ int ib_sg_to_pages(struct ib_mr *mr,
do {
ret = set_page(mr, page_addr);
- if (unlikely(ret < 0))
- return i ? : ret;
+ if (unlikely(ret < 0)) {
+ sg_offset = prev_addr - sg_dma_address(sg);
+ mr->length += prev_addr - dma_addr;
+ if (sg_offset_p)
+ *sg_offset_p = sg_offset;
+ return i || sg_offset ? i : ret;
+ }
+ prev_addr = page_addr;
next_page:
page_addr += mr->page_size;
} while (page_addr < end_dma_addr);
@@ -1694,8 +1744,12 @@ next_page:
mr->length += dma_len;
last_end_dma_addr = end_dma_addr;
last_page_off = end_dma_addr & ~page_mask;
+
+ sg_offset = 0;
}
+ if (sg_offset_p)
+ *sg_offset_p = 0;
return i;
}
EXPORT_SYMBOL(ib_sg_to_pages);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 3234a8be16f6..47cb927a0dd6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -783,15 +783,14 @@ static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
-static int iwch_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
{
struct iwch_mr *mhp = to_iwch_mr(ibmr);
mhp->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
}
static int iwch_destroy_qp(struct ib_qp *ib_qp)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 651711370d55..a3a67216bce6 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -119,7 +119,7 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
static int mpa_rev = 2;
module_param(mpa_rev, int, 0644);
MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
- "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
+ "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
" compliant (default=2)");
static int markers_enabled;
@@ -145,19 +145,35 @@ static struct sk_buff_head rxq;
static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
static void ep_timeout(unsigned long arg);
static void connect_reply_upcall(struct c4iw_ep *ep, int status);
+static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
static LIST_HEAD(timeout_list);
static spinlock_t timeout_lock;
+static void deref_cm_id(struct c4iw_ep_common *epc)
+{
+ epc->cm_id->rem_ref(epc->cm_id);
+ epc->cm_id = NULL;
+ set_bit(CM_ID_DEREFED, &epc->history);
+}
+
+static void ref_cm_id(struct c4iw_ep_common *epc)
+{
+ set_bit(CM_ID_REFED, &epc->history);
+ epc->cm_id->add_ref(epc->cm_id);
+}
+
static void deref_qp(struct c4iw_ep *ep)
{
c4iw_qp_rem_ref(&ep->com.qp->ibqp);
clear_bit(QP_REFERENCED, &ep->com.flags);
+ set_bit(QP_DEREFED, &ep->com.history);
}
static void ref_qp(struct c4iw_ep *ep)
{
set_bit(QP_REFERENCED, &ep->com.flags);
+ set_bit(QP_REFED, &ep->com.history);
c4iw_qp_add_ref(&ep->com.qp->ibqp);
}
@@ -201,6 +217,8 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
if (error < 0)
kfree_skb(skb);
+ else if (error == NET_XMIT_DROP)
+ return -ENOMEM;
return error < 0 ? error : 0;
}
@@ -290,12 +308,63 @@ static void *alloc_ep(int size, gfp_t gfp)
return epc;
}
+static void remove_ep_tid(struct c4iw_ep *ep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ep->com.dev->lock, flags);
+ _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+ spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+}
+
+static void insert_ep_tid(struct c4iw_ep *ep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ep->com.dev->lock, flags);
+ _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0);
+ spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+}
+
+/*
+ * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
+ */
+static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
+{
+ struct c4iw_ep *ep;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ ep = idr_find(&dev->hwtid_idr, tid);
+ if (ep)
+ c4iw_get_ep(&ep->com);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ return ep;
+}
+
+/*
+ * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
+ */
+static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
+ unsigned int stid)
+{
+ struct c4iw_listen_ep *ep;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ ep = idr_find(&dev->stid_idr, stid);
+ if (ep)
+ c4iw_get_ep(&ep->com);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ return ep;
+}
+
void _c4iw_free_ep(struct kref *kref)
{
struct c4iw_ep *ep;
ep = container_of(kref, struct c4iw_ep, com.kref);
- PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+ PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
if (test_bit(QP_REFERENCED, &ep->com.flags))
deref_qp(ep);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
@@ -309,10 +378,11 @@ void _c4iw_free_ep(struct kref *kref)
(const u32 *)&sin6->sin6_addr.s6_addr,
1);
}
- remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
+ if (ep->mpa_skb)
+ kfree_skb(ep->mpa_skb);
}
kfree(ep);
}
@@ -320,6 +390,15 @@ void _c4iw_free_ep(struct kref *kref)
static void release_ep_resources(struct c4iw_ep *ep)
{
set_bit(RELEASE_RESOURCES, &ep->com.flags);
+
+ /*
+ * If we have a hwtid, then remove it from the idr table
+ * so lookups will no longer find this endpoint. Otherwise
+ * we have a race where one thread finds the ep ptr just
+ * before the other thread is freeing the ep memory.
+ */
+ if (ep->hwtid != -1)
+ remove_ep_tid(ep);
c4iw_put_ep(&ep->com);
}
@@ -432,10 +511,74 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
- PDBG("%s c4iw_dev %p\n", __func__, handle);
+ pr_err(MOD "ARP failure\n");
kfree_skb(skb);
}
+static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
+{
+ pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
+}
+
+enum {
+ NUM_FAKE_CPLS = 2,
+ FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
+ FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
+};
+
+static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+
+ ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
+ release_ep_resources(ep);
+ return 0;
+}
+
+static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+
+ ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
+ c4iw_put_ep(&ep->parent_ep->com);
+ release_ep_resources(ep);
+ return 0;
+}
+
+/*
+ * Fake up a special CPL opcode and call sched() so process_work() will call
+ * _put_ep_safe() in a safe context to free the ep resources. This is needed
+ * because ARP error handlers are called in an ATOMIC context, and
+ * _c4iw_free_ep() needs to block.
+ */
+static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
+ int cpl)
+{
+ struct cpl_act_establish *rpl = cplhdr(skb);
+
+ /* Set our special ARP_FAILURE opcode */
+ rpl->ot.opcode = cpl;
+
+ /*
+ * Save ep in the skb->cb area, after where sched() will save the dev
+ * ptr.
+ */
+ *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
+ sched(ep->com.dev, skb);
+}
+
+/* Handle an ARP failure for an accept */
+static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep = handle;
+
+ pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n",
+ ep->hwtid);
+
+ __state_set(&ep->com, DEAD);
+ queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
+}
+
/*
* Handle an ARP failure for an active open.
*/
@@ -444,9 +587,8 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
struct c4iw_ep *ep = handle;
printk(KERN_ERR MOD "ARP failure during connect\n");
- kfree_skb(skb);
connect_reply_upcall(ep, -EHOSTUNREACH);
- state_set(&ep->com, DEAD);
+ __state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)&ep->com.local_addr;
@@ -455,9 +597,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
}
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
- dst_release(ep->dst);
- cxgb4_l2t_release(ep->l2t);
- c4iw_put_ep(&ep->com);
+ queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
}
/*
@@ -466,15 +606,21 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
*/
static void abort_arp_failure(void *handle, struct sk_buff *skb)
{
- struct c4iw_rdev *rdev = handle;
+ int ret;
+ struct c4iw_ep *ep = handle;
+ struct c4iw_rdev *rdev = &ep->com.dev->rdev;
struct cpl_abort_req *req = cplhdr(skb);
PDBG("%s rdev %p\n", __func__, rdev);
req->cmd = CPL_ABORT_NO_RST;
- c4iw_ofld_send(rdev, skb);
+ ret = c4iw_ofld_send(rdev, skb);
+ if (ret) {
+ __state_set(&ep->com, DEAD);
+ queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
+ }
}
-static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
+static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
{
unsigned int flowclen = 80;
struct fw_flowc_wr *flowc;
@@ -530,7 +676,7 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- c4iw_ofld_send(&ep->com.dev->rdev, skb);
+ return c4iw_ofld_send(&ep->com.dev->rdev, skb);
}
static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
@@ -568,7 +714,7 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
+ t4_set_arp_err_handler(skb, ep, abort_arp_failure);
req = (struct cpl_abort_req *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
@@ -807,10 +953,10 @@ clip_release:
return ret;
}
-static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
- u8 mpa_rev_to_use)
+static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
+ u8 mpa_rev_to_use)
{
- int mpalen, wrlen;
+ int mpalen, wrlen, ret;
struct fw_ofld_tx_data_wr *req;
struct mpa_message *mpa;
struct mpa_v2_conn_params mpa_v2_params;
@@ -826,7 +972,7 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
skb = get_skb(skb, wrlen, GFP_KERNEL);
if (!skb) {
connect_reply_upcall(ep, -ENOMEM);
- return;
+ return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -894,12 +1040,14 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
- c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ if (ret)
+ return ret;
start_ep_timer(ep);
__state_set(&ep->com, MPA_REQ_SENT);
ep->mpa_attr.initiator = 1;
ep->snd_seq += mpalen;
- return;
+ return ret;
}
static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
@@ -975,7 +1123,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
*/
skb_get(skb);
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
ep->snd_seq += mpalen;
@@ -1060,7 +1208,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
* Function fw4_ack() will deref it.
*/
skb_get(skb);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
ep->mpa_skb = skb;
__state_set(&ep->com, MPA_REP_SENT);
ep->snd_seq += mpalen;
@@ -1074,6 +1222,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int tid = GET_TID(req);
unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
struct tid_info *t = dev->rdev.lldi.tids;
+ int ret;
ep = lookup_atid(t, atid);
@@ -1086,7 +1235,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
/* setup the hwtid for this connection */
ep->hwtid = tid;
cxgb4_insert_tid(t, ep, tid);
- insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid);
+ insert_ep_tid(ep);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
@@ -1099,13 +1248,22 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
set_bit(ACT_ESTAB, &ep->com.history);
/* start MPA negotiation */
- send_flowc(ep, NULL);
+ ret = send_flowc(ep, NULL);
+ if (ret)
+ goto err;
if (ep->retry_with_mpa_v1)
- send_mpa_req(ep, skb, 1);
+ ret = send_mpa_req(ep, skb, 1);
else
- send_mpa_req(ep, skb, mpa_rev);
+ ret = send_mpa_req(ep, skb, mpa_rev);
+ if (ret)
+ goto err;
mutex_unlock(&ep->com.mutex);
return 0;
+err:
+ mutex_unlock(&ep->com.mutex);
+ connect_reply_upcall(ep, -ENOMEM);
+ c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ return 0;
}
static void close_complete_upcall(struct c4iw_ep *ep, int status)
@@ -1120,20 +1278,11 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
PDBG("close complete delivered ep %p cm_id %p tid %u\n",
ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
+ deref_cm_id(&ep->com);
set_bit(CLOSE_UPCALL, &ep->com.history);
}
}
-static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- __state_set(&ep->com, ABORTING);
- set_bit(ABORT_CONN, &ep->com.history);
- return send_abort(ep, skb, gfp);
-}
-
static void peer_close_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
@@ -1161,8 +1310,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep)
PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
+ deref_cm_id(&ep->com);
set_bit(ABORT_UPCALL, &ep->com.history);
}
}
@@ -1205,10 +1353,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
set_bit(CONN_RPL_UPCALL, &ep->com.history);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- if (status < 0) {
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- }
+ if (status < 0)
+ deref_cm_id(&ep->com);
}
static int connect_request_upcall(struct c4iw_ep *ep)
@@ -1301,6 +1447,18 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
#define RELAXED_IRD_NEGOTIATION 1
+/*
+ * process_mpa_reply - process streaming mode MPA reply
+ *
+ * Returns:
+ *
+ * 0 upon success indicating a connect request was delivered to the ULP
+ * or the mpa request is incomplete but valid so far.
+ *
+ * 1 if a failure requires the caller to close the connection.
+ *
+ * 2 if a failure requires the caller to abort the connection.
+ */
static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{
struct mpa_message *mpa;
@@ -1316,20 +1474,12 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
- * Stop mpa timer. If it expired, then
- * we ignore the MPA reply. process_timeout()
- * will abort the connection.
- */
- if (stop_ep_timer(ep))
- return 0;
-
- /*
* If we get more than the supported amount of private data
* then we must fail this connection.
*/
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
err = -EINVAL;
- goto err;
+ goto err_stop_timer;
}
/*
@@ -1351,11 +1501,11 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
plen = ntohs(mpa->private_data_size);
@@ -1365,7 +1515,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
*/
if (plen > MPA_MAX_PRIVATE_DATA) {
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
/*
@@ -1373,7 +1523,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
*/
if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
err = -EPROTO;
- goto err;
+ goto err_stop_timer;
}
ep->plen = (u8) plen;
@@ -1387,10 +1537,18 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
if (mpa->flags & MPA_REJECT) {
err = -ECONNREFUSED;
- goto err;
+ goto err_stop_timer;
}
/*
+ * Stop mpa timer. If it expired, then
+ * we ignore the MPA reply. process_timeout()
+ * will abort the connection.
+ */
+ if (stop_ep_timer(ep))
+ return 0;
+
+ /*
* If we get here we have accumulated the entire mpa
* start reply message including private data. And
* the MPA header is valid.
@@ -1529,15 +1687,28 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
goto out;
}
goto out;
+err_stop_timer:
+ stop_ep_timer(ep);
err:
- __state_set(&ep->com, ABORTING);
- send_abort(ep, skb, GFP_KERNEL);
+ disconnect = 2;
out:
connect_reply_upcall(ep, err);
return disconnect;
}
-static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+/*
+ * process_mpa_request - process streaming mode MPA request
+ *
+ * Returns:
+ *
+ * 0 upon success indicating a connect request was delivered to the ULP
+ * or the mpa request is incomplete but valid so far.
+ *
+ * 1 if a failure requires the caller to close the connection.
+ *
+ * 2 if a failure requires the caller to abort the connection.
+ */
+static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
{
struct mpa_message *mpa;
struct mpa_v2_conn_params *mpa_v2_params;
@@ -1549,11 +1720,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* If we get more than the supported amount of private data
* then we must fail this connection.
*/
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
+ goto err_stop_timer;
PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
@@ -1569,7 +1737,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* We'll continue process when more data arrives.
*/
if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
+ return 0;
PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
@@ -1580,43 +1748,32 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (mpa->revision > mpa_rev) {
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
+ goto err_stop_timer;
}
- if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
+ goto err_stop_timer;
plen = ntohs(mpa->private_data_size);
/*
* Fail if there's too much private data.
*/
- if (plen > MPA_MAX_PRIVATE_DATA) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (plen > MPA_MAX_PRIVATE_DATA)
+ goto err_stop_timer;
/*
* If plen does not account for pkt size
*/
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- (void)stop_ep_timer(ep);
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
+ goto err_stop_timer;
ep->plen = (u8) plen;
/*
* If we don't have all the pdata yet, then bail.
*/
if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
+ return 0;
/*
* If we get here we have accumulated the entire mpa
@@ -1665,26 +1822,26 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
ep->mpa_attr.p2p_type);
- /*
- * If the endpoint timer already expired, then we ignore
- * the start request. process_timeout() will abort
- * the connection.
- */
- if (!stop_ep_timer(ep)) {
- __state_set(&ep->com, MPA_REQ_RCVD);
-
- /* drive upcall */
- mutex_lock_nested(&ep->parent_ep->com.mutex,
- SINGLE_DEPTH_NESTING);
- if (ep->parent_ep->com.state != DEAD) {
- if (connect_request_upcall(ep))
- abort_connection(ep, skb, GFP_KERNEL);
- } else {
- abort_connection(ep, skb, GFP_KERNEL);
- }
- mutex_unlock(&ep->parent_ep->com.mutex);
+ __state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
+ if (ep->parent_ep->com.state != DEAD) {
+ if (connect_request_upcall(ep))
+ goto err_unlock_parent;
+ } else {
+ goto err_unlock_parent;
}
- return;
+ mutex_unlock(&ep->parent_ep->com.mutex);
+ return 0;
+
+err_unlock_parent:
+ mutex_unlock(&ep->parent_ep->com.mutex);
+ goto err_out;
+err_stop_timer:
+ (void)stop_ep_timer(ep);
+err_out:
+ return 2;
}
static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
@@ -1693,11 +1850,10 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_rx_data *hdr = cplhdr(skb);
unsigned int dlen = ntohs(hdr->len);
unsigned int tid = GET_TID(hdr);
- struct tid_info *t = dev->rdev.lldi.tids;
__u8 status = hdr->status;
int disconnect = 0;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
@@ -1715,7 +1871,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
break;
case MPA_REQ_WAIT:
ep->rcv_seq += dlen;
- process_mpa_request(ep, skb);
+ disconnect = process_mpa_request(ep, skb);
break;
case FPDU_MODE: {
struct c4iw_qp_attributes attrs;
@@ -1736,7 +1892,8 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
}
mutex_unlock(&ep->com.mutex);
if (disconnect)
- c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
return 0;
}
@@ -1746,9 +1903,8 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
int release = 0;
unsigned int tid = GET_TID(rpl);
- struct tid_info *t = dev->rdev.lldi.tids;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
if (!ep) {
printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
return 0;
@@ -1770,10 +1926,11 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
if (release)
release_ep_resources(ep);
+ c4iw_put_ep(&ep->com);
return 0;
}
-static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
+static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
{
struct sk_buff *skb;
struct fw_ofld_connection_wr *req;
@@ -1843,7 +2000,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
set_bit(ACT_OFLD_CONN, &ep->com.history);
- c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
/*
@@ -1986,6 +2143,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
init_timer(&ep->timer);
+ c4iw_init_wr_wait(&ep->com.wr_wait);
/*
* Allocate an active TID to initiate a TCP connection.
@@ -2069,6 +2227,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct sockaddr_in *ra;
struct sockaddr_in6 *la6;
struct sockaddr_in6 *ra6;
+ int ret = 0;
ep = lookup_atid(t, atid);
la = (struct sockaddr_in *)&ep->com.local_addr;
@@ -2104,9 +2263,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
mutex_unlock(&dev->rdev.stats.lock);
if (ep->com.local_addr.ss_family == AF_INET &&
dev->rdev.lldi.enable_fw_ofld_conn) {
- send_fw_act_open_req(ep,
- TID_TID_G(AOPEN_ATID_G(
- ntohl(rpl->atid_status))));
+ ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
+ ntohl(rpl->atid_status))));
+ if (ret)
+ goto fail;
return 0;
}
break;
@@ -2146,6 +2306,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
break;
}
+fail:
connect_reply_upcall(ep, status2errno(status));
state_set(&ep->com, DEAD);
@@ -2170,9 +2331,8 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_pass_open_rpl *rpl = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int stid = GET_TID(rpl);
- struct c4iw_listen_ep *ep = lookup_stid(t, stid);
+ struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
if (!ep) {
PDBG("%s stid %d lookup failure!\n", __func__, stid);
@@ -2181,7 +2341,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s ep %p status %d error %d\n", __func__, ep,
rpl->status, status2errno(rpl->status));
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
-
+ c4iw_put_ep(&ep->com);
out:
return 0;
}
@@ -2189,17 +2349,17 @@ out:
static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int stid = GET_TID(rpl);
- struct c4iw_listen_ep *ep = lookup_stid(t, stid);
+ struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
PDBG("%s ep %p\n", __func__, ep);
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
+ c4iw_put_ep(&ep->com);
return 0;
}
-static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
- struct cpl_pass_accept_req *req)
+static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
+ struct cpl_pass_accept_req *req)
{
struct cpl_pass_accept_rpl *rpl;
unsigned int mtu_idx;
@@ -2287,10 +2447,9 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
rpl->opt0 = cpu_to_be64(opt0);
rpl->opt2 = cpu_to_be32(opt2);
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
- c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
- return;
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
@@ -2355,7 +2514,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned short hdrs;
u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
- parent_ep = lookup_stid(t, stid);
+ parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
PDBG("%s connect request on invalid stid %d\n", __func__, stid);
goto reject;
@@ -2468,9 +2627,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
init_timer(&child_ep->timer);
cxgb4_insert_tid(t, child_ep, hwtid);
- insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
- accept_cr(child_ep, skb, req);
- set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
+ insert_ep_tid(child_ep);
+ if (accept_cr(child_ep, skb, req)) {
+ c4iw_put_ep(&parent_ep->com);
+ release_ep_resources(child_ep);
+ } else {
+ set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
+ }
if (iptype == 6) {
sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
@@ -2479,6 +2642,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto out;
reject:
reject_cr(dev, hwtid, skb);
+ if (parent_ep)
+ c4iw_put_ep(&parent_ep->com);
out:
return 0;
}
@@ -2487,10 +2652,10 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct c4iw_ep *ep;
struct cpl_pass_establish *req = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(req);
+ int ret;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
@@ -2501,10 +2666,15 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
set_emss(ep, ntohs(req->tcp_opt));
dst_confirm(ep->dst);
- state_set(&ep->com, MPA_REQ_WAIT);
+ mutex_lock(&ep->com.mutex);
+ ep->com.state = MPA_REQ_WAIT;
start_ep_timer(ep);
- send_flowc(ep, skb);
set_bit(PASS_ESTAB, &ep->com.history);
+ ret = send_flowc(ep, skb);
+ mutex_unlock(&ep->com.mutex);
+ if (ret)
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
return 0;
}
@@ -2516,11 +2686,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_qp_attributes attrs;
int disconnect = 1;
int release = 0;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(hdr);
int ret;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
+
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
dst_confirm(ep->dst);
@@ -2592,6 +2764,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
if (release)
release_ep_resources(ep);
+ c4iw_put_ep(&ep->com);
return 0;
}
@@ -2604,10 +2777,12 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_qp_attributes attrs;
int ret;
int release = 0;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(req);
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
+
if (is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
@@ -2616,7 +2791,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
mutex_unlock(&dev->rdev.stats.lock);
- return 0;
+ goto deref_ep;
}
PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
ep->com.state);
@@ -2633,6 +2808,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case CONNECTING:
+ c4iw_put_ep(&ep->parent_ep->com);
break;
case MPA_REQ_WAIT:
(void)stop_ep_timer(ep);
@@ -2681,7 +2857,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
case DEAD:
PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
mutex_unlock(&ep->com.mutex);
- return 0;
+ goto deref_ep;
default:
BUG_ON(1);
break;
@@ -2728,6 +2904,10 @@ out:
c4iw_reconnect(ep);
}
+deref_ep:
+ c4iw_put_ep(&ep->com);
+ /* Dereferencing ep, referenced in peer_abort_intr() */
+ c4iw_put_ep(&ep->com);
return 0;
}
@@ -2737,16 +2917,18 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_qp_attributes attrs;
struct cpl_close_con_rpl *rpl = cplhdr(skb);
int release = 0;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(rpl);
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
mutex_lock(&ep->com.mutex);
+ set_bit(CLOSE_CON_RPL, &ep->com.history);
switch (ep->com.state) {
case CLOSING:
__state_set(&ep->com, MORIBUND);
@@ -2774,18 +2956,18 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
mutex_unlock(&ep->com.mutex);
if (release)
release_ep_resources(ep);
+ c4iw_put_ep(&ep->com);
return 0;
}
static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_rdma_terminate *rpl = cplhdr(skb);
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(rpl);
struct c4iw_ep *ep;
struct c4iw_qp_attributes attrs;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
BUG_ON(!ep);
if (ep && ep->com.qp) {
@@ -2796,6 +2978,7 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
} else
printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
+ c4iw_put_ep(&ep->com);
return 0;
}
@@ -2811,15 +2994,16 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_fw4_ack *hdr = cplhdr(skb);
u8 credits = hdr->credits;
unsigned int tid = GET_TID(hdr);
- struct tid_info *t = dev->rdev.lldi.tids;
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
if (credits == 0) {
PDBG("%s 0 credit ack ep %p tid %u state %u\n",
__func__, ep, ep->hwtid, state_read(&ep->com));
- return 0;
+ goto out;
}
dst_confirm(ep->dst);
@@ -2829,7 +3013,13 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
+ mutex_lock(&ep->com.mutex);
+ if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
+ stop_ep_timer(ep);
+ mutex_unlock(&ep->com.mutex);
}
+out:
+ c4iw_put_ep(&ep->com);
return 0;
}
@@ -2841,22 +3031,23 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
- if (ep->com.state == DEAD) {
+ if (ep->com.state != MPA_REQ_RCVD) {
mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return -ECONNRESET;
}
set_bit(ULP_REJECT, &ep->com.history);
- BUG_ON(ep->com.state != MPA_REQ_RCVD);
if (mpa_rev == 0)
- abort_connection(ep, NULL, GFP_KERNEL);
+ disconnect = 2;
else {
err = send_mpa_reject(ep, pdata, pdata_len);
disconnect = 1;
}
mutex_unlock(&ep->com.mutex);
- if (disconnect)
- err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ if (disconnect) {
+ stop_ep_timer(ep);
+ err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
+ }
c4iw_put_ep(&ep->com);
return 0;
}
@@ -2869,24 +3060,23 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_ep *ep = to_ep(cm_id);
struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
+ int abort = 0;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
- if (ep->com.state == DEAD) {
+ if (ep->com.state != MPA_REQ_RCVD) {
err = -ECONNRESET;
- goto err;
+ goto err_out;
}
- BUG_ON(ep->com.state != MPA_REQ_RCVD);
BUG_ON(!qp);
set_bit(ULP_ACCEPT, &ep->com.history);
if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
(conn_param->ird > cur_max_read_depth(ep->com.dev))) {
- abort_connection(ep, NULL, GFP_KERNEL);
err = -EINVAL;
- goto err;
+ goto err_abort;
}
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
@@ -2898,9 +3088,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ord = conn_param->ord;
send_mpa_reject(ep, conn_param->private_data,
conn_param->private_data_len);
- abort_connection(ep, NULL, GFP_KERNEL);
err = -ENOMEM;
- goto err;
+ goto err_abort;
}
}
if (conn_param->ird < ep->ord) {
@@ -2908,9 +3097,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ord <= h->rdev.lldi.max_ordird_qp) {
conn_param->ird = ep->ord;
} else {
- abort_connection(ep, NULL, GFP_KERNEL);
err = -ENOMEM;
- goto err;
+ goto err_abort;
}
}
}
@@ -2929,8 +3117,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
- cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
+ ref_cm_id(&ep->com);
ep->com.qp = qp;
ref_qp(ep);
@@ -2951,23 +3139,27 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = c4iw_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1);
if (err)
- goto err1;
+ goto err_deref_cm_id;
+
+ set_bit(STOP_MPA_TIMER, &ep->com.flags);
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
- goto err1;
+ goto err_deref_cm_id;
__state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return 0;
-err1:
- ep->com.cm_id = NULL;
- abort_connection(ep, NULL, GFP_KERNEL);
- cm_id->rem_ref(cm_id);
-err:
+err_deref_cm_id:
+ deref_cm_id(&ep->com);
+err_abort:
+ abort = 1;
+err_out:
mutex_unlock(&ep->com.mutex);
+ if (abort)
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return err;
}
@@ -3067,9 +3259,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (peer2peer && ep->ord == 0)
ep->ord = 1;
- cm_id->add_ref(cm_id);
- ep->com.dev = dev;
ep->com.cm_id = cm_id;
+ ref_cm_id(&ep->com);
+ ep->com.dev = dev;
ep->com.qp = get_qhp(dev, conn_param->qpn);
if (!ep->com.qp) {
PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
@@ -3108,7 +3300,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
/*
* Handle loopback requests to INADDR_ANY.
*/
- if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
+ if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
goto fail1;
@@ -3176,7 +3368,7 @@ fail2:
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail1:
- cm_id->rem_ref(cm_id);
+ deref_cm_id(&ep->com);
c4iw_put_ep(&ep->com);
out:
return err;
@@ -3270,8 +3462,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
goto fail1;
}
PDBG("%s ep %p\n", __func__, ep);
- cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
+ ref_cm_id(&ep->com);
ep->com.dev = dev;
ep->backlog = backlog;
memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
@@ -3311,7 +3503,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
fail2:
- cm_id->rem_ref(cm_id);
+ deref_cm_id(&ep->com);
c4iw_put_ep(&ep->com);
fail1:
out:
@@ -3350,7 +3542,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
done:
- cm_id->rem_ref(cm_id);
+ deref_cm_id(&ep->com);
c4iw_put_ep(&ep->com);
return err;
}
@@ -3367,6 +3559,12 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
states[ep->com.state], abrupt);
+ /*
+ * Ref the ep here in case we have fatal errors causing the
+ * ep to be released and freed.
+ */
+ c4iw_get_ep(&ep->com);
+
rdev = &ep->com.dev->rdev;
if (c4iw_fatal_error(rdev)) {
fatal = 1;
@@ -3418,10 +3616,30 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
set_bit(EP_DISC_CLOSE, &ep->com.history);
ret = send_halfclose(ep, gfp);
}
- if (ret)
+ if (ret) {
+ set_bit(EP_DISC_FAIL, &ep->com.history);
+ if (!abrupt) {
+ stop_ep_timer(ep);
+ close_complete_upcall(ep, -EIO);
+ }
+ if (ep->com.qp) {
+ struct c4iw_qp_attributes attrs;
+
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ ret = c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (ret)
+ pr_err(MOD
+ "%s - qp <- error failed!\n",
+ __func__);
+ }
fatal = 1;
+ }
}
mutex_unlock(&ep->com.mutex);
+ c4iw_put_ep(&ep->com);
if (fatal)
release_ep_resources(ep);
return ret;
@@ -3676,7 +3894,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_pass_accept_req *req = (void *)(rss + 1);
struct l2t_entry *e;
struct dst_entry *dst;
- struct c4iw_ep *lep;
+ struct c4iw_ep *lep = NULL;
u16 window;
struct port_info *pi;
struct net_device *pdev;
@@ -3701,7 +3919,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
*/
stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
- lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
+ lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!lep) {
PDBG("%s connect request on invalid stid %d\n", __func__, stid);
goto reject;
@@ -3802,6 +4020,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
free_dst:
dst_release(dst);
reject:
+ if (lep)
+ c4iw_put_ep(&lep->com);
return 0;
}
@@ -3809,7 +4029,7 @@ reject:
* These are the real handlers that are called from a
* work queue.
*/
-static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
+static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
[CPL_ACT_ESTABLISH] = act_establish,
[CPL_ACT_OPEN_RPL] = act_open_rpl,
[CPL_RX_DATA] = rx_data,
@@ -3825,7 +4045,9 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
[CPL_RDMA_TERMINATE] = terminate,
[CPL_FW4_ACK] = fw4_ack,
[CPL_FW6_MSG] = deferred_fw6_msg,
- [CPL_RX_PKT] = rx_pkt
+ [CPL_RX_PKT] = rx_pkt,
+ [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
+ [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
};
static void process_timeout(struct c4iw_ep *ep)
@@ -3839,11 +4061,12 @@ static void process_timeout(struct c4iw_ep *ep)
set_bit(TIMEDOUT, &ep->com.history);
switch (ep->com.state) {
case MPA_REQ_SENT:
- __state_set(&ep->com, ABORTING);
connect_reply_upcall(ep, -ETIMEDOUT);
break;
case MPA_REQ_WAIT:
- __state_set(&ep->com, ABORTING);
+ case MPA_REQ_RCVD:
+ case MPA_REP_SENT:
+ case FPDU_MODE:
break;
case CLOSING:
case MORIBUND:
@@ -3853,7 +4076,6 @@ static void process_timeout(struct c4iw_ep *ep)
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
- __state_set(&ep->com, ABORTING);
close_complete_upcall(ep, -ETIMEDOUT);
break;
case ABORTING:
@@ -3871,9 +4093,9 @@ static void process_timeout(struct c4iw_ep *ep)
__func__, ep, ep->hwtid, ep->com.state);
abort = 0;
}
- if (abort)
- abort_connection(ep, NULL, GFP_KERNEL);
mutex_unlock(&ep->com.mutex);
+ if (abort)
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
}
@@ -4006,10 +4228,10 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss *req = cplhdr(skb);
struct c4iw_ep *ep;
- struct tid_info *t = dev->rdev.lldi.tids;
unsigned int tid = GET_TID(req);
- ep = lookup_tid(t, tid);
+ ep = get_ep_from_tid(dev, tid);
+ /* This EP will be dereferenced in peer_abort() */
if (!ep) {
printk(KERN_WARNING MOD
"Abort on non-existent endpoint, tid %d\n", tid);
@@ -4020,24 +4242,13 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
- ep->stats.abort_neg_adv++;
- dev->rdev.stats.neg_adv++;
- kfree_skb(skb);
- return 0;
+ goto out;
}
PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
ep->com.state);
- /*
- * Wake up any threads in rdma_init() or rdma_fini().
- * However, if we are on MPAv2 and want to retry with MPAv1
- * then, don't wake up yet.
- */
- if (mpa_rev == 2 && !ep->tried_with_mpa_v1) {
- if (ep->com.state != MPA_REQ_SENT)
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
- } else
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+out:
sched(dev, skb);
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index df43f871ab61..f6f34a75af27 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -755,6 +755,7 @@ enum c4iw_ep_flags {
CLOSE_SENT = 3,
TIMEOUT = 4,
QP_REFERENCED = 5,
+ STOP_MPA_TIMER = 7,
};
enum c4iw_ep_history {
@@ -779,7 +780,13 @@ enum c4iw_ep_history {
EP_DISC_ABORT = 18,
CONN_RPL_UPCALL = 19,
ACT_RETRY_NOMEM = 20,
- ACT_RETRY_INUSE = 21
+ ACT_RETRY_INUSE = 21,
+ CLOSE_CON_RPL = 22,
+ EP_DISC_FAIL = 24,
+ QP_REFED = 25,
+ QP_DEREFED = 26,
+ CM_ID_REFED = 27,
+ CM_ID_DEREFED = 28,
};
struct c4iw_ep_common {
@@ -917,9 +924,8 @@ void c4iw_qp_rem_ref(struct ib_qp *qp);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int c4iw_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int c4iw_dealloc_mw(struct ib_mw *mw);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 008be07d5604..55d0651ee4de 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -86,8 +86,9 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
(wait ? FW_WR_COMPL_F : 0));
req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16)));
- req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE));
- req->cmd |= cpu_to_be32(T5_ULP_MEMIO_ORDER_V(1));
+ req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
+ T5_ULP_MEMIO_ORDER_V(1) |
+ T5_ULP_MEMIO_FID_V(rdev->lldi.rxq_ids[0]));
req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5));
req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16));
req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr));
@@ -690,15 +691,14 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
-int c4iw_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
mhp->mpl_len = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page);
}
int c4iw_dereg_mr(struct ib_mr *ib_mr)
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 819767681445..8b9532034558 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -50,8 +50,6 @@
#include <rdma/ib_pack.h>
#include <rdma/rdma_cm.h>
#include <rdma/iw_cm.h>
-#include <rdma/iw_portmap.h>
-#include <rdma/rdma_netlink.h>
#include <crypto/hash.h>
#include "i40iw_status.h"
@@ -254,6 +252,7 @@ struct i40iw_device {
u32 arp_table_size;
u32 next_arp_index;
spinlock_t resource_lock; /* hw resource access */
+ spinlock_t qptable_lock;
u32 vendor_id;
u32 vendor_part_id;
u32 of_device_registered;
@@ -392,7 +391,7 @@ void i40iw_flush_wqes(struct i40iw_device *iwdev,
void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
unsigned char *mac_addr,
- __be32 *ip_addr,
+ u32 *ip_addr,
bool ipv4,
u32 action);
@@ -550,7 +549,7 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
struct i40iw_qp_flush_info *info,
bool wait);
-void i40iw_copy_ip_ntohl(u32 *dst, u32 *src);
+void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src);
struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
u64 addr,
u64 size,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 38f917a6c778..d2fa72516960 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -771,6 +771,7 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
{
struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+ u16 ctrl_ird, ctrl_ord;
/* initialize the upper 5 bytes of the frame */
i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
@@ -779,38 +780,38 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
/* initialize RTR msg */
if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- rtr_msg->ctrl_ird = IETF_NO_IRD_ORD;
- rtr_msg->ctrl_ord = IETF_NO_IRD_ORD;
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
} else {
- rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+ ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
IETF_NO_IRD_ORD : cm_node->ird_size;
- rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+ ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
IETF_NO_IRD_ORD : cm_node->ord_size;
}
- rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER;
- rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+ ctrl_ird |= IETF_PEER_TO_PEER;
+ ctrl_ird |= IETF_FLPDU_ZERO_LEN;
switch (mpa_key) {
case MPA_KEY_REQUEST:
- rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
- rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_READ;
break;
case MPA_KEY_REPLY:
switch (cm_node->send_rdma0_op) {
case SEND_RDMA_WRITE_ZERO:
- rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_WRITE;
break;
case SEND_RDMA_READ_ZERO:
- rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+ ctrl_ord |= IETF_RDMA0_READ;
break;
}
break;
default:
break;
}
- rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird);
- rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord);
+ rtr_msg->ctrl_ird = htons(ctrl_ird);
+ rtr_msg->ctrl_ord = htons(ctrl_ord);
}
/**
@@ -2107,7 +2108,7 @@ static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr)
struct in6_addr raddr6;
i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
- return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6));
+ return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
}
/**
@@ -2160,7 +2161,7 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
cm_node->tcp_cntxt.rcv_wnd =
I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+ cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
cm_node->tcp_cntxt.mss = iwdev->mss;
cm_node->iwdev = iwdev;
@@ -2234,7 +2235,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
if (cm_node->listener) {
i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
} else {
- if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) &&
+ if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
cm_node->apbvt_set && cm_node->iwdev) {
i40iw_manage_apbvt(cm_node->iwdev,
cm_node->loc_port,
@@ -2852,7 +2853,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
void *private_data,
struct i40iw_cm_info *cm_info)
{
- int ret;
struct i40iw_cm_node *cm_node;
struct i40iw_cm_listener *loopback_remotelistener;
struct i40iw_cm_node *loopback_remotenode;
@@ -2922,30 +2922,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
memcpy(cm_node->pdata_buf, private_data, private_data_len);
cm_node->state = I40IW_CM_STATE_SYN_SENT;
- ret = i40iw_send_syn(cm_node, 0);
-
- if (ret) {
- if (cm_node->ipv4)
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI4",
- cm_node->rem_addr);
- else
- i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI6",
- cm_node->rem_addr);
- i40iw_rem_ref_cm_node(cm_node);
- cm_node = NULL;
- }
-
- if (cm_node)
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_port,
- cm_node,
- cm_node->cm_id);
-
return cm_node;
}
@@ -3266,11 +3242,13 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
- tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev,
- &tcp_info->dest_ip_addr3,
- true,
- NULL,
- I40IW_ARP_RESOLVE));
+ tcp_info->arp_idx =
+ cpu_to_le16((u16)i40iw_arp_table(
+ iwqp->iwdev,
+ &tcp_info->dest_ip_addr3,
+ true,
+ NULL,
+ I40IW_ARP_RESOLVE));
} else {
tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
@@ -3282,12 +3260,13 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
- tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(
- iwqp->iwdev,
- &tcp_info->dest_ip_addr0,
- false,
- NULL,
- I40IW_ARP_RESOLVE));
+ tcp_info->arp_idx =
+ cpu_to_le16((u16)i40iw_arp_table(
+ iwqp->iwdev,
+ &tcp_info->dest_ip_addr0,
+ false,
+ NULL,
+ I40IW_ARP_RESOLVE));
}
}
@@ -3564,7 +3543,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct i40iw_cm_node *cm_node;
struct ib_qp_attr attr;
int passive_state;
- struct i40iw_ib_device *iwibdev;
struct ib_mr *ibmr;
struct i40iw_pd *iwpd;
u16 buf_len = 0;
@@ -3627,7 +3605,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
!i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
(!cm_node->ipv4 &&
!i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
- iwibdev = iwdev->iwibdev;
iwpd = iwqp->iwpd;
tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
@@ -3752,6 +3729,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct sockaddr_in *raddr;
struct sockaddr_in6 *laddr6;
struct sockaddr_in6 *raddr6;
+ bool qhash_set = false;
int apbvt_set = 0;
enum i40iw_status_code status;
@@ -3810,6 +3788,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
true);
if (status)
return -EINVAL;
+ qhash_set = true;
}
status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
if (status) {
@@ -3828,23 +3807,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
conn_param->private_data_len,
(void *)conn_param->private_data,
&cm_info);
- if (!cm_node) {
- i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
-
- if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
- cm_info.loc_port))
- i40iw_manage_apbvt(iwdev,
- cm_info.loc_port,
- I40IW_MANAGE_APBVT_DEL);
- cm_id->rem_ref(cm_id);
- iwdev->cm_core.stats_connect_errs++;
- return -ENOMEM;
- }
+ if (!cm_node)
+ goto err;
i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
@@ -3852,12 +3816,54 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_node->ord_size = 1;
cm_node->apbvt_set = apbvt_set;
- cm_node->qhash_set = true;
+ cm_node->qhash_set = qhash_set;
iwqp->cm_node = cm_node;
cm_node->iwqp = iwqp;
iwqp->cm_id = cm_id;
i40iw_add_ref(&iwqp->ibqp);
+
+ if (cm_node->state == I40IW_CM_STATE_SYN_SENT) {
+ if (i40iw_send_syn(cm_node, 0)) {
+ i40iw_rem_ref_cm_node(cm_node);
+ goto err;
+ }
+ }
+
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_port,
+ cm_node,
+ cm_node->cm_id);
return 0;
+
+err:
+ if (cm_node) {
+ if (cm_node->ipv4)
+ i40iw_debug(cm_node->dev,
+ I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI4",
+ cm_node->rem_addr);
+ else
+ i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI6",
+ cm_node->rem_addr);
+ }
+ i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+
+ if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
+ cm_info.loc_port))
+ i40iw_manage_apbvt(iwdev,
+ cm_info.loc_port,
+ I40IW_MANAGE_APBVT_DEL);
+ cm_id->rem_ref(cm_id);
+ iwdev->cm_core.stats_connect_errs++;
+ return -ENOMEM;
}
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 5f8ceb4a8e84..e9046d9f9645 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -1,6 +1,6 @@
/*******************************************************************************
*
-* Copyright (c) 2015 Intel Corporation. All rights reserved.
+* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -291,8 +291,6 @@ struct i40iw_cm_listener {
u8 loc_mac[ETH_ALEN];
u32 loc_addr[4];
u16 loc_port;
- u32 map_loc_addr[4];
- u16 map_loc_port;
struct iw_cm_id *cm_id;
atomic_t ref_count;
struct i40iw_device *iwdev;
@@ -317,8 +315,6 @@ struct i40iw_kmem_info {
struct i40iw_cm_node {
u32 loc_addr[4], rem_addr[4];
u16 loc_port, rem_port;
- u32 map_loc_addr[4], map_rem_addr[4];
- u16 map_loc_port, map_rem_port;
u16 vlan_id;
enum i40iw_cm_node_state state;
u8 loc_mac[ETH_ALEN];
@@ -370,10 +366,6 @@ struct i40iw_cm_info {
u16 rem_port;
u32 loc_addr[4];
u32 rem_addr[4];
- u16 map_loc_port;
- u16 map_rem_port;
- u32 map_loc_addr[4];
- u32 map_rem_addr[4];
u16 vlan_id;
int backlog;
u16 user_pri;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index f05802bf6ca0..2c4b4d072d6a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -114,16 +114,21 @@ static enum i40iw_status_code i40iw_cqp_poll_registers(
* i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
* @buf: ptr to fpm commit buffer
* @info: ptr to i40iw_hmc_obj_info struct
+ * @sd: number of SDs for HMC objects
*
* parses fpm commit info and copy base value
* of hmc objects in hmc_info
*/
static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
u64 *buf,
- struct i40iw_hmc_obj_info *info)
+ struct i40iw_hmc_obj_info *info,
+ u32 *sd)
{
u64 temp;
+ u64 size;
+ u64 base = 0;
u32 i, j;
+ u32 k = 0;
u32 low;
/* copy base values in obj_info */
@@ -131,10 +136,20 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
get_64bit_val(buf, j, &temp);
info[i].base = RS_64_1(temp, 32) * 512;
+ if (info[i].base > base) {
+ base = info[i].base;
+ k = i;
+ }
low = (u32)(temp);
if (low)
info[i].cnt = low;
}
+ size = info[k].cnt * info[k].size + info[k].base;
+ if (size & 0x1FFFFF)
+ *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */
+ else
+ *sd = (u32)(size >> 21);
+
return 0;
}
@@ -2909,6 +2924,65 @@ static enum i40iw_status_code i40iw_sc_mw_alloc(
}
/**
+ * i40iw_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp
+ * @qp: sc qp struct
+ * @info: fast mr info
+ * @post_sq: flag for cqp db to ring
+ */
+enum i40iw_status_code i40iw_sc_mr_fast_register(
+ struct i40iw_sc_qp *qp,
+ struct i40iw_fast_reg_stag_info *info,
+ bool post_sq)
+{
+ u64 temp, header;
+ u64 *wqe;
+ u32 wqe_idx;
+
+ wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
+ 0, info->wr_id);
+ if (!wqe)
+ return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+ i40iw_debug(qp->dev, I40IW_DEBUG_MR, "%s: wr_id[%llxh] wqe_idx[%04d] location[%p]\n",
+ __func__, info->wr_id, wqe_idx,
+ &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid);
+ temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
+ set_64bit_val(wqe, 0, temp);
+
+ temp = RS_64(info->first_pm_pbl_index >> 16, I40IWQPSQ_FIRSTPMPBLIDXHI);
+ set_64bit_val(wqe,
+ 8,
+ LS_64(temp, I40IWQPSQ_FIRSTPMPBLIDXHI) |
+ LS_64(info->reg_addr_pa >> I40IWQPSQ_PBLADDR_SHIFT, I40IWQPSQ_PBLADDR));
+
+ set_64bit_val(wqe,
+ 16,
+ info->total_len |
+ LS_64(info->first_pm_pbl_index, I40IWQPSQ_FIRSTPMPBLIDXLO));
+
+ header = LS_64(info->stag_key, I40IWQPSQ_STAGKEY) |
+ LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
+ LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
+ LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
+ LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) |
+ LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
+ LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
+ LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+ LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+ LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "FAST_REG WQE",
+ wqe, I40IW_QP_WQE_MIN_SIZE);
+
+ if (post_sq)
+ i40iw_qp_post_wr(&qp->qp_uk);
+ return 0;
+}
+
+/**
* i40iw_sc_send_lsmm - send last streaming mode message
* @qp: sc qp struct
* @lsmm_buf: buffer with lsmm message
@@ -3147,7 +3221,7 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_
i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
/* parse the fpm_commit_buf and fill hmc obj info */
- i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj);
+ i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt);
mem_size = sizeof(struct i40iw_hmc_sd_entry) *
(hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
@@ -3221,7 +3295,9 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev
/* parse the fpm_commit_buf and fill hmc obj info */
if (!ret_code)
- ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj);
+ ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf,
+ hmc_info->hmc_obj,
+ &hmc_info->sd_table.sd_cnt);
i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
@@ -3469,6 +3545,40 @@ static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp)
}
/**
+ * i40iw_est_sd - returns approximate number of SDs for HMC
+ * @dev: sc device struct
+ * @hmc_info: hmc structure, size and count for HMC objects
+ */
+static u64 i40iw_est_sd(struct i40iw_sc_dev *dev, struct i40iw_hmc_info *hmc_info)
+{
+ int i;
+ u64 size = 0;
+ u64 sd;
+
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_PBLE; i++)
+ size += hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size;
+
+ if (dev->is_pf)
+ size += hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
+
+ if (size & 0x1FFFFF)
+ sd = (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd = size >> 21;
+
+ if (!dev->is_pf) {
+ /* 2MB alignment for VF PBLE HMC */
+ size = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
+ if (size & 0x1FFFFF)
+ sd += (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd += size >> 21;
+ }
+
+ return sd;
+}
+
+/**
* i40iw_config_fpm_values - configure HMC objects
* @dev: sc device struct
* @qp_count: desired qp count
@@ -3479,7 +3589,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
u32 i, mem_size;
u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
u32 powerof2;
- u64 sd_needed, bytes_needed;
+ u64 sd_needed;
u32 loop_count = 0;
struct i40iw_hmc_info *hmc_info;
@@ -3497,23 +3607,15 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
return ret_code;
}
- bytes_needed = 0;
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+ for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
- bytes_needed +=
- (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n",
- __func__, i, hmc_info->hmc_obj[i].max_cnt,
- hmc_info->hmc_obj[i].size);
- }
- sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */
+ sd_needed = i40iw_est_sd(dev, hmc_info);
i40iw_debug(dev, I40IW_DEBUG_HMC,
"%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
__func__, sd_needed, hmc_info->first_sd_index);
i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n",
- __func__, bytes_needed, hmc_info->sd_table.sd_cnt,
+ "%s: sd count %d where max sd is %d\n",
+ __func__, hmc_info->sd_table.sd_cnt,
hmc_fpm_misc->max_sds);
qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
@@ -3555,11 +3657,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
/* How much memory is needed for all the objects. */
- bytes_needed = 0;
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
- bytes_needed +=
- (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
- sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;
+ sd_needed = i40iw_est_sd(dev, hmc_info);
if ((loop_count > 1000) ||
((!(loop_count % 10)) &&
(qpwanted > qpwantedoriginal * 2 / 3))) {
@@ -3580,15 +3678,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
pblewanted -= FPM_MULTIPLIER * 1000;
} while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
- bytes_needed = 0;
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
- bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n",
- __func__, i, hmc_info->hmc_obj[i].cnt,
- hmc_info->hmc_obj[i].size);
- }
- sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up not truncate. */
+ sd_needed = i40iw_est_sd(dev, hmc_info);
i40iw_debug(dev, I40IW_DEBUG_HMC,
"loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
@@ -3606,8 +3696,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
return ret_code;
}
- hmc_info->sd_table.sd_cnt = (u32)sd_needed;
-
mem_size = sizeof(struct i40iw_hmc_sd_entry) *
(hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
@@ -3911,11 +3999,11 @@ enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
*/
static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
{
- u16 *mpa;
+ __be16 *mpa;
u32 opcode = 0xffffffff;
if (info->q2_data_written) {
- mpa = (u16 *)pkt;
+ mpa = (__be16 *)pkt;
opcode = ntohs(mpa[1]) & 0xf;
}
return opcode;
@@ -3977,7 +4065,7 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
if (info->q2_data_written) {
/* Use data from offending packet to fill in ddp & rdma hdrs */
pkt = i40iw_locate_mpa(pkt);
- ddp_seg_len = ntohs(*(u16 *)pkt);
+ ddp_seg_len = ntohs(*(__be16 *)pkt);
if (ddp_seg_len) {
copy_len = 2;
termhdr->hdrct = DDP_LEN_FLAG;
@@ -4188,13 +4276,13 @@ void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *
void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
{
u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
- u32 *mpa;
+ __be32 *mpa;
u8 ddp_ctl;
u8 rdma_ctl;
u16 aeq_id = 0;
struct i40iw_terminate_hdr *termhdr;
- mpa = (u32 *)i40iw_locate_mpa(pkt);
+ mpa = (__be32 *)i40iw_locate_mpa(pkt);
if (info->q2_data_written) {
/* did not validate the frame - do it now */
ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
@@ -4559,17 +4647,18 @@ static struct i40iw_pd_ops iw_pd_ops = {
};
static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
- i40iw_sc_qp_init,
- i40iw_sc_qp_create,
- i40iw_sc_qp_modify,
- i40iw_sc_qp_destroy,
- i40iw_sc_qp_flush_wqes,
- i40iw_sc_qp_upload_context,
- i40iw_sc_qp_setctx,
- i40iw_sc_send_lsmm,
- i40iw_sc_send_lsmm_nostag,
- i40iw_sc_send_rtt,
- i40iw_sc_post_wqe0,
+ .qp_init = i40iw_sc_qp_init,
+ .qp_create = i40iw_sc_qp_create,
+ .qp_modify = i40iw_sc_qp_modify,
+ .qp_destroy = i40iw_sc_qp_destroy,
+ .qp_flush_wqes = i40iw_sc_qp_flush_wqes,
+ .qp_upload_context = i40iw_sc_qp_upload_context,
+ .qp_setctx = i40iw_sc_qp_setctx,
+ .qp_send_lsmm = i40iw_sc_send_lsmm,
+ .qp_send_lsmm_nostag = i40iw_sc_send_lsmm_nostag,
+ .qp_send_rtt = i40iw_sc_send_rtt,
+ .qp_post_wqe0 = i40iw_sc_post_wqe0,
+ .iw_mr_fast_register = i40iw_sc_mr_fast_register
};
static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index aab88d65f805..bd942da91a27 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -1290,7 +1290,7 @@
/* wqe size considering 32 bytes per wqe*/
#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */
-#define I40IWQP_SW_MAX_WQSIZE 16384 /* 524288 bytes */
+#define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */
#define I40IWQP_OP_RDMA_WRITE 0
#define I40IWQP_OP_RDMA_READ 1
@@ -1512,6 +1512,8 @@ enum i40iw_alignment {
I40IW_SD_BUF_ALIGNMENT = 0x100
};
+#define I40IW_WQE_SIZE_64 64
+
#define I40IW_QP_WQE_MIN_SIZE 32
#define I40IW_QP_WQE_MAX_SIZE 128
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 9fd302425563..3ee0cad96bc6 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -106,7 +106,9 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
set_bit(2, iwdev->allocated_pds);
spin_lock_init(&iwdev->resource_lock);
- mrdrvbits = 24 - get_count_order(iwdev->max_mr);
+ spin_lock_init(&iwdev->qptable_lock);
+ /* stag index mask has a minimum of 14 bits */
+ mrdrvbits = 24 - max(get_count_order(iwdev->max_mr), 14);
iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
return 0;
}
@@ -301,11 +303,15 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
"%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
__func__, info->ae_id, info->qp, info->qp_cq_id);
if (info->qp) {
+ spin_lock_irqsave(&iwdev->qptable_lock, flags);
iwqp = iwdev->qp_table[info->qp_cq_id];
if (!iwqp) {
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
continue;
}
+ i40iw_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
qp = &iwqp->sc_qp;
spin_lock_irqsave(&iwqp->lock, flags);
iwqp->hw_tcp_state = info->tcp_state;
@@ -411,6 +417,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
i40iw_terminate_connection(qp, info);
break;
}
+ if (info->qp)
+ i40iw_rem_ref(&iwqp->ibqp);
} while (1);
if (aeqcnt)
@@ -460,7 +468,7 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad
*/
void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
unsigned char *mac_addr,
- __be32 *ip_addr,
+ u32 *ip_addr,
bool ipv4,
u32 action)
{
@@ -481,7 +489,7 @@ void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
info = &cqp_info->in.u.add_arp_cache_entry.info;
memset(info, 0, sizeof(*info));
- info->arp_index = cpu_to_le32(arp_index);
+ info->arp_index = cpu_to_le16((u16)arp_index);
info->permanent = true;
ether_addr_copy(info->mac_addr, mac_addr);
cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index e41fae2422ab..c963cad92f5a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -270,7 +270,6 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
else
i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
- synchronize_irq(msix_vec->irq);
free_irq(msix_vec->irq, dev_id);
}
@@ -1147,10 +1146,7 @@ static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iw
if (!status) {
status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
(u8)iwdev->mac_ip_table_idx);
- if (!status)
- status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
- (u8)iwdev->mac_ip_table_idx);
- else
+ if (status)
i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
}
return status;
@@ -1165,7 +1161,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
struct net_device *ip_dev;
struct inet6_dev *idev;
struct inet6_ifaddr *ifp;
- __be32 local_ipaddr6[4];
+ u32 local_ipaddr6[4];
rcu_read_lock();
for_each_netdev_rcu(&init_net, ip_dev) {
@@ -1512,6 +1508,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
I40IW_HMC_PROFILE_DEFAULT;
iwdev->max_rdma_vfs =
(iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0;
+ iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
iwdev->netdev = ldev->netdev;
hdl->client = client;
iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
@@ -1531,7 +1528,10 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
goto exit;
iwdev->obj_next = iwdev->obj_mem;
iwdev->push_mode = push_mode;
+
init_waitqueue_head(&iwdev->vchnl_waitq);
+ init_waitqueue_head(&dev->vf_reqs);
+
status = i40iw_initialize_dev(iwdev, ldev);
exit:
if (status) {
@@ -1710,7 +1710,6 @@ static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u
for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
continue;
-
/* free all resources allocated on behalf of vf */
tmp_vfdev = dev->vf_dev[i];
spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
@@ -1819,8 +1818,6 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
dev = &hdl->device.sc_dev;
iwdev = dev->back_dev;
- i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len);
-
if (dev->vchnl_if.vchnl_recv) {
ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
if (!dev->is_pf) {
@@ -1832,6 +1829,39 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
}
/**
+ * i40iw_vf_clear_to_send - wait to send virtual channel message
+ * @dev: iwarp device *
+ * Wait for until virtual channel is clear
+ * before sending the next message
+ *
+ * Returns false if error
+ * Returns true if clear to send
+ */
+bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
+{
+ struct i40iw_device *iwdev;
+ wait_queue_t wait;
+
+ iwdev = dev->back_dev;
+
+ if (!wq_has_sleeper(&dev->vf_reqs) &&
+ (atomic_read(&iwdev->vchnl_msgs) == 0))
+ return true; /* virtual channel is clear */
+
+ init_wait(&wait);
+ add_wait_queue_exclusive(&dev->vf_reqs, &wait);
+
+ if (!wait_event_timeout(dev->vf_reqs,
+ (atomic_read(&iwdev->vchnl_msgs) == 0),
+ I40IW_VCHNL_EVENT_TIMEOUT))
+ dev->vchnl_up = false;
+
+ remove_wait_queue(&dev->vf_reqs, &wait);
+
+ return dev->vchnl_up;
+}
+
+/**
* i40iw_virtchnl_send - send a message through the virtual channel
* @dev: iwarp device
* @vf_id: virtual function id associated with the message
@@ -1848,18 +1878,16 @@ static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
{
struct i40iw_device *iwdev;
struct i40e_info *ldev;
- enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR;
if (!dev || !dev->back_dev)
- return ret_code;
+ return I40IW_ERR_BAD_PTR;
iwdev = dev->back_dev;
ldev = iwdev->ldev;
if (ldev && ldev->ops && ldev->ops->virtchnl_send)
- ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
-
- return ret_code;
+ return ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
+ return I40IW_ERR_BAD_PTR;
}
/* client interface functions */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index 7e20493510e8..80f422bf3967 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -172,6 +172,7 @@ struct i40iw_hw;
u8 __iomem *i40iw_get_hw_addr(void *dev);
void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
+bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev);
enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
u32 length, u32 value);
struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
index ded853d2fad8..85993dc44f6e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -404,13 +404,14 @@ static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
if (sd_entry->valid)
return 0;
- if (dev->is_pf)
+ if (dev->is_pf) {
ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
sd_reg_val, idx->sd_idx,
sd_entry->entry_type, true);
- if (ret_code) {
- i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
- goto error;
+ if (ret_code) {
+ i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
+ goto error;
+ }
}
sd_entry->valid = true;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 8eb400d8a7a0..e9c6e82af9c7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -1194,7 +1194,7 @@ static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq,
ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
while (datalen) {
- fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap));
+ fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(__be16 *)datap));
if (fpdu_len > pfpdu->max_fpdu_data) {
i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
"%s: error bad fpdu_len\n", __func__);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
index b0110c15e044..91c421762f06 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_status.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_status.h
@@ -95,6 +95,7 @@ enum i40iw_status_code {
I40IW_ERR_INVALID_MAC_ADDR = -65,
I40IW_ERR_BAD_STAG = -66,
I40IW_ERR_CQ_COMPL_ERROR = -67,
+ I40IW_ERR_QUEUE_DESTROYED = -68
};
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index edb3a8c8267a..16cc61720b53 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -479,16 +479,17 @@ struct i40iw_sc_dev {
struct i40iw_virt_mem ieq_mem;
struct i40iw_puda_rsrc *ieq;
- struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
+ const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
struct i40iw_hmc_fpm_misc hmc_fpm_misc;
u16 qs_handle;
- u32 debug_mask;
+ u32 debug_mask;
u16 exception_lan_queue;
u8 hmc_fn_id;
bool is_pf;
bool vchnl_up;
u8 vf_id;
+ wait_queue_head_t vf_reqs;
u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
u8 hw_rev;
@@ -889,8 +890,8 @@ struct i40iw_qhash_table_info {
u32 qp_num;
u32 dest_ip[4];
u32 src_ip[4];
- u32 dest_port;
- u32 src_port;
+ u16 dest_port;
+ u16 src_port;
};
struct i40iw_local_mac_ipaddr_entry_info {
@@ -1040,6 +1041,9 @@ struct i40iw_priv_qp_ops {
void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
+ enum i40iw_status_code (*iw_mr_fast_register)(struct i40iw_sc_qp *,
+ struct i40iw_fast_reg_stag_info *,
+ bool);
};
struct i40iw_priv_cq_ops {
@@ -1108,7 +1112,7 @@ struct i40iw_hmc_ops {
enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
struct i40iw_hmc_fpm_misc *);
enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
- enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *);
+ enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *, u32 *sd);
enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
struct i40iw_hmc_create_obj_info *);
enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index f78c3dc8bdb2..e35faea88c13 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -56,6 +56,9 @@ static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp)
wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
wqe = qp->sq_base[wqe_idx].elem;
+
+ qp->sq_wrtrk_array[wqe_idx].wqe_size = I40IW_QP_WQE_MIN_SIZE;
+
peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
wqe_0 = qp->sq_base[peek_head].elem;
if (peek_head)
@@ -130,7 +133,10 @@ static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
*/
u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
u32 *wqe_idx,
- u8 wqe_size)
+ u8 wqe_size,
+ u32 total_size,
+ u64 wr_id
+ )
{
u64 *wqe = NULL;
u64 wqe_ptr;
@@ -159,6 +165,17 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
if (!*wqe_idx)
qp->swqe_polarity = !qp->swqe_polarity;
}
+
+ if (((*wqe_idx & 3) == 1) && (wqe_size == I40IW_WQE_SIZE_64)) {
+ i40iw_nop_1(qp);
+ I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ if (ret_code)
+ return NULL;
+ *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ }
+
for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
if (ret_code)
@@ -169,8 +186,15 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
wqe_0 = qp->sq_base[peek_head].elem;
- if (peek_head & 0x3)
- wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+
+ if (((peek_head & 3) == 1) || ((peek_head & 3) == 3)) {
+ if (RS_64(wqe_0[3], I40IWQPSQ_VALID) != !qp->swqe_polarity)
+ wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+ }
+
+ qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id;
+ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
+ qp->sq_wrtrk_array[*wqe_idx].wqe_size = wqe_size;
return wqe;
}
@@ -249,12 +273,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
set_64bit_val(wqe, 16,
LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
if (!op_info->rem_addr.stag)
@@ -309,12 +330,9 @@ static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp,
ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->lo_addr.len, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len;
local_fence |= info->local_fence;
set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
@@ -366,13 +384,11 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
read_fence |= info->read_fence;
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
set_64bit_val(wqe, 16, 0);
header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
LS_64(info->op_type, I40IWQPSQ_OPCODE) |
@@ -427,13 +443,11 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
read_fence |= info->read_fence;
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
set_64bit_val(wqe, 16,
LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
@@ -507,14 +521,11 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
if (ret_code)
return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
read_fence |= info->read_fence;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
LS_64(info->op_type, I40IWQPSQ_OPCODE) |
LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
@@ -574,12 +585,9 @@ static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp
op_info = &info->op.inv_local_stag;
local_fence = info->local_fence;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
set_64bit_val(wqe, 0, 0);
set_64bit_val(wqe, 8,
LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
@@ -619,12 +627,9 @@ static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp,
op_info = &info->op.bind_window;
local_fence |= info->local_fence;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
set_64bit_val(wqe, 8,
LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
@@ -760,7 +765,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
enum i40iw_status_code ret_code2 = 0;
bool move_cq_head = true;
u8 polarity;
- u8 addl_frag_cnt, addl_wqes = 0;
+ u8 addl_wqes = 0;
if (cq->avoid_mem_cflct)
cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
@@ -797,6 +802,10 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+ if (!qp) {
+ ret_code = I40IW_ERR_QUEUE_DESTROYED;
+ goto exit;
+ }
wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
@@ -827,11 +836,8 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
sw_wqe = qp->sq_base[wqe_idx].elem;
get_64bit_val(sw_wqe, 24, &wqe_qword);
- addl_frag_cnt =
- (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
- i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
- addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+ addl_wqes = qp->sq_wrtrk_array[wqe_idx].wqe_size / I40IW_QP_WQE_MIN_SIZE;
I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
} else {
do {
@@ -843,9 +849,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
get_64bit_val(sw_wqe, 24, &wqe_qword);
op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
info->op_type = op_type;
- addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
- i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
- addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+ addl_wqes = qp->sq_wrtrk_array[tail].wqe_size / I40IW_QP_WQE_MIN_SIZE;
I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
if (op_type != I40IWQP_OP_NOP) {
info->wr_id = qp->sq_wrtrk_array[tail].wrid;
@@ -859,6 +863,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
ret_code = 0;
+exit:
if (!ret_code &&
(info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
if (pring && (I40IW_RING_MORE_WORK(*pring)))
@@ -893,19 +898,21 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
* i40iw_get_wqe_shift - get shift count for maximum wqe size
* @wqdepth: depth of wq required.
* @sge: Maximum Scatter Gather Elements wqe
+ * @inline_data: Maximum inline data size
* @shift: Returns the shift needed based on sge
*
- * Shift can be used to left shift the wqe size based on sge.
- * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1
- * (64 bytes wqes) and 2 otherwise (128 bytes wqe).
+ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
+ * For 1 SGE or inline data <= 16, shift = 0 (wqe size of 32 bytes).
+ * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes).
+ * Shift of 2 otherwise (wqe size of 128 bytes).
*/
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift)
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift)
{
u32 size;
*shift = 0;
- if (sge > 1)
- *shift = (sge < 4) ? 1 : 2;
+ if (sge > 1 || inline_data > 16)
+ *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
/* check if wqdepth is multiple of 2 or not */
@@ -968,11 +975,11 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
return I40IW_ERR_INVALID_FRAG_COUNT;
- ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift);
+ ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift);
if (ret_code)
return ret_code;
- ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift);
+ ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift);
if (ret_code)
return ret_code;
@@ -1097,12 +1104,9 @@ enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
u64 header, *wqe;
u32 wqe_idx;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+ wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, wr_id);
if (!wqe)
return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->sq_wrtrk_array[wqe_idx].wrid = wr_id;
- qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
set_64bit_val(wqe, 0, 0);
set_64bit_val(wqe, 8, 0);
set_64bit_val(wqe, 16, 0);
@@ -1125,7 +1129,7 @@ enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
* @frag_cnt: number of fragments
* @wqe_size: size of sq wqe returned
*/
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size)
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size)
{
switch (frag_cnt) {
case 0:
@@ -1156,7 +1160,7 @@ enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size)
* @frag_cnt: number of fragments
* @wqe_size: size of rq wqe returned
*/
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size)
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size)
{
switch (frag_cnt) {
case 0:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 5cd971bb8cc7..4627646fe8cd 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -61,7 +61,7 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_CQ_SIZE = 1048575,
I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
I40IW_DB_ID_ZERO = 0,
- I40IW_MAX_WQ_FRAGMENT_COUNT = 6,
+ I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
I40IW_MAX_SGE_RD = 1,
I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647,
I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647,
@@ -70,8 +70,8 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_VF_FPM_ID = 47,
I40IW_MAX_VF_PER_PF = 127,
I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
- I40IW_MAX_INLINE_DATA_SIZE = 112,
- I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 112,
+ I40IW_MAX_INLINE_DATA_SIZE = 48,
+ I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
I40IW_MAX_IRD_SIZE = 32,
I40IW_QPCTX_ENCD_MAXIRD = 3,
I40IW_MAX_WQ_ENTRIES = 2048,
@@ -102,6 +102,8 @@ enum i40iw_device_capabilities_const {
#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8)
+#define I40IW_MAX_MR_SIZE 0x10000000000L
+
struct i40iw_qp_uk;
struct i40iw_cq_uk;
struct i40iw_srq_uk;
@@ -198,7 +200,7 @@ enum i40iw_completion_notify {
struct i40iw_post_send {
i40iw_sgl sg_list;
- u8 num_sges;
+ u32 num_sges;
};
struct i40iw_post_inline_send {
@@ -220,7 +222,7 @@ struct i40iw_post_inline_send_w_inv {
struct i40iw_rdma_write {
i40iw_sgl lo_sg_list;
- u8 num_lo_sges;
+ u32 num_lo_sges;
struct i40iw_sge rem_addr;
};
@@ -345,7 +347,9 @@ struct i40iw_dev_uk {
struct i40iw_sq_uk_wr_trk_info {
u64 wrid;
- u64 wr_len;
+ u32 wr_len;
+ u8 wqe_size;
+ u8 reserved[3];
};
struct i40iw_qp_quanta {
@@ -367,6 +371,8 @@ struct i40iw_qp_uk {
u32 qp_id;
u32 sq_size;
u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
struct i40iw_qp_uk_ops ops;
bool use_srq;
u8 swqe_polarity;
@@ -374,8 +380,6 @@ struct i40iw_qp_uk {
u8 rwqe_polarity;
u8 rq_wqe_size;
u8 rq_wqe_size_multiplier;
- u8 max_sq_frag_cnt;
- u8 max_rq_frag_cnt;
bool deferred_flag;
};
@@ -404,8 +408,9 @@ struct i40iw_qp_uk_init_info {
u32 qp_id;
u32 sq_size;
u32 rq_size;
- u8 max_sq_frag_cnt;
- u8 max_rq_frag_cnt;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
};
@@ -422,7 +427,10 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev);
void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
- u8 wqe_size);
+ u8 wqe_size,
+ u32 total_size,
+ u64 wr_id
+ );
u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
@@ -434,9 +442,9 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
bool signaled, bool post_sq);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size);
enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
u8 *wqe_size);
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift);
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift);
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 1ceec81bd8eb..0e8db0a35141 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -59,7 +59,7 @@
* @action: modify, delete or add
*/
int i40iw_arp_table(struct i40iw_device *iwdev,
- __be32 *ip_addr,
+ u32 *ip_addr,
bool ipv4,
u8 *mac_addr,
u32 action)
@@ -152,7 +152,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
struct net_device *upper_dev;
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
- __be32 local_ipaddr;
+ u32 local_ipaddr;
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
@@ -167,11 +167,10 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
switch (event) {
case NETDEV_DOWN:
if (upper_dev)
- local_ipaddr =
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
else
- local_ipaddr = ifa->ifa_address;
- local_ipaddr = ntohl(local_ipaddr);
+ local_ipaddr = ntohl(ifa->ifa_address);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
@@ -180,11 +179,10 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
return NOTIFY_OK;
case NETDEV_UP:
if (upper_dev)
- local_ipaddr =
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
else
- local_ipaddr = ifa->ifa_address;
- local_ipaddr = ntohl(local_ipaddr);
+ local_ipaddr = ntohl(ifa->ifa_address);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
@@ -194,12 +192,11 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
case NETDEV_CHANGEADDR:
/* Add the address to the IP table */
if (upper_dev)
- local_ipaddr =
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
else
- local_ipaddr = ifa->ifa_address;
+ local_ipaddr = ntohl(ifa->ifa_address);
- local_ipaddr = ntohl(local_ipaddr);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
@@ -227,7 +224,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
struct net_device *netdev;
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
- __be32 local_ipaddr6[4];
+ u32 local_ipaddr6[4];
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
@@ -506,14 +503,19 @@ void i40iw_rem_ref(struct ib_qp *ibqp)
struct cqp_commands_info *cqp_info;
struct i40iw_device *iwdev;
u32 qp_num;
+ unsigned long flags;
iwqp = to_iwqp(ibqp);
- if (!atomic_dec_and_test(&iwqp->refcount))
+ iwdev = iwqp->iwdev;
+ spin_lock_irqsave(&iwdev->qptable_lock, flags);
+ if (!atomic_dec_and_test(&iwqp->refcount)) {
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
return;
+ }
- iwdev = iwqp->iwdev;
qp_num = iwqp->ibqp.qp_num;
iwdev->qp_table[qp_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
if (!cqp_request)
return;
@@ -985,21 +987,24 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
{
struct i40iw_device *iwdev = dev->back_dev;
- enum i40iw_status_code err_code = 0;
int timeout_ret;
i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
__func__, __LINE__, dev, iwdev);
- atomic_add(2, &iwdev->vchnl_msgs);
+
+ atomic_set(&iwdev->vchnl_msgs, 2);
timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
(atomic_read(&iwdev->vchnl_msgs) == 1),
I40IW_VCHNL_EVENT_TIMEOUT);
atomic_dec(&iwdev->vchnl_msgs);
if (!timeout_ret) {
i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
- err_code = I40IW_ERR_TIMEOUT;
+ atomic_set(&iwdev->vchnl_msgs, 0);
+ dev->vchnl_up = false;
+ return I40IW_ERR_TIMEOUT;
}
- return err_code;
+ wake_up(&dev->vf_reqs);
+ return 0;
}
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 1fe3b84a06e4..4a740f7a0519 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -63,8 +63,8 @@ static int i40iw_query_device(struct ib_device *ibdev,
ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
props->fw_ver = I40IW_FW_VERSION;
props->device_cap_flags = iwdev->device_cap_flags;
- props->vendor_id = iwdev->vendor_id;
- props->vendor_part_id = iwdev->vendor_part_id;
+ props->vendor_id = iwdev->ldev->pcidev->vendor;
+ props->vendor_part_id = iwdev->ldev->pcidev->device;
props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
props->max_qp = iwdev->max_qp;
@@ -74,7 +74,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
props->max_cqe = iwdev->max_cqe;
props->max_mr = iwdev->max_mr;
props->max_pd = iwdev->max_pd;
- props->max_sge_rd = 1;
+ props->max_sge_rd = I40IW_MAX_SGE_RD;
props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
props->max_qp_init_rd_atom = props->max_qp_rd_atom;
props->atomic_cap = IB_ATOMIC_NONE;
@@ -120,7 +120,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
props->pkey_tbl_len = 1;
props->active_width = IB_WIDTH_4X;
props->active_speed = 1;
- props->max_msg_sz = 0x80000000;
+ props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
return 0;
}
@@ -437,7 +437,6 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
kfree(iwqp->kqp.wrid_mem);
iwqp->kqp.wrid_mem = NULL;
kfree(iwqp->allocated_buffer);
- iwqp->allocated_buffer = NULL;
}
/**
@@ -521,14 +520,12 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
enum i40iw_status_code status;
struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
- ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT;
-
sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
- status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift);
+ status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
if (!status)
- status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift);
+ status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
if (status)
return -ENOSYS;
@@ -609,6 +606,9 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+ if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
+ init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
memset(&init_info, 0, sizeof(init_info));
sq_size = init_attr->cap.max_send_wr;
@@ -618,6 +618,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
init_info.qp_uk_init_info.rq_size = rq_size;
init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+ init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
if (!mem)
@@ -722,8 +723,10 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
iwarp_info = &iwqp->iwarp_info;
iwarp_info->rd_enable = true;
iwarp_info->wr_rdresp_en = true;
- if (!iwqp->user_mode)
+ if (!iwqp->user_mode) {
+ iwarp_info->fast_reg_en = true;
iwarp_info->priv_mode_en = true;
+ }
iwarp_info->ddp_ver = 1;
iwarp_info->rdmap_ver = 1;
@@ -784,6 +787,8 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
return ERR_PTR(err_code);
}
}
+ init_completion(&iwqp->sq_drained);
+ init_completion(&iwqp->rq_drained);
return &iwqp->ibqp;
error:
@@ -1444,6 +1449,166 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
}
/**
+ * i40iw_hw_alloc_stag - cqp command to allocate stag
+ * @iwdev: iwarp device
+ * @iwmr: iwarp mr pointer
+ */
+static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr)
+{
+ struct i40iw_allocate_stag_info *info;
+ struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ enum i40iw_status_code status;
+ int err = 0;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.alloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->page_size = PAGE_SIZE;
+ info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->total_len = iwmr->length;
+ cqp_info->cqp_cmd = OP_ALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
+ cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
+
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status) {
+ err = -ENOMEM;
+ i40iw_pr_err("CQP-OP MR Reg fail");
+ }
+ return err;
+}
+
+/**
+ * i40iw_alloc_mr - register stag for fast memory registration
+ * @pd: ibpd pointer
+ * @mr_type: memory for stag registrion
+ * @max_num_sg: man number of pages
+ */
+static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct i40iw_pd *iwpd = to_iwpd(pd);
+ struct i40iw_device *iwdev = to_iwdev(pd->device);
+ struct i40iw_pble_alloc *palloc;
+ struct i40iw_pbl *iwpbl;
+ struct i40iw_mr *iwmr;
+ enum i40iw_status_code status;
+ u32 stag;
+ int err_code = -ENOMEM;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ stag = i40iw_create_stag(iwdev);
+ if (!stag) {
+ err_code = -EOVERFLOW;
+ goto err;
+ }
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IW_MEMREG_TYPE_MEM;
+ palloc = &iwpbl->pble_alloc;
+ iwmr->page_cnt = max_num_sg;
+ mutex_lock(&iwdev->pbl_mutex);
+ status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
+ mutex_unlock(&iwdev->pbl_mutex);
+ if (!status)
+ goto err1;
+
+ if (palloc->level != I40IW_LEVEL_1)
+ goto err2;
+ err_code = i40iw_hw_alloc_stag(iwdev, iwmr);
+ if (err_code)
+ goto err2;
+ iwpbl->pbl_allocated = true;
+ i40iw_add_pdusecount(iwpd);
+ return &iwmr->ibmr;
+err2:
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+err1:
+ i40iw_free_stag(iwdev, stag);
+err:
+ kfree(iwmr);
+ return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_set_page - populate pbl list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @addr: page dma address fro pbl list
+ */
+static int i40iw_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct i40iw_mr *iwmr = to_iwmr(ibmr);
+ struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+ struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+ u64 *pbl;
+
+ if (unlikely(iwmr->npages == iwmr->page_cnt))
+ return -ENOMEM;
+
+ pbl = (u64 *)palloc->level1.addr;
+ pbl[iwmr->npages++] = cpu_to_le64(addr);
+ return 0;
+}
+
+/**
+ * i40iw_map_mr_sg - map of sg list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @sg: scatter gather list for fmr
+ * @sg_nents: number of sg pages
+ */
+static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
+{
+ struct i40iw_mr *iwmr = to_iwmr(ibmr);
+
+ iwmr->npages = 0;
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page);
+}
+
+/**
+ * i40iw_drain_sq - drain the send queue
+ * @ibqp: ib qp pointer
+ */
+static void i40iw_drain_sq(struct ib_qp *ibqp)
+{
+ struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+ if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ wait_for_completion(&iwqp->sq_drained);
+}
+
+/**
+ * i40iw_drain_rq - drain the receive queue
+ * @ibqp: ib qp pointer
+ */
+static void i40iw_drain_rq(struct ib_qp *ibqp)
+{
+ struct i40iw_qp *iwqp = to_iwqp(ibqp);
+ struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+ if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
+ wait_for_completion(&iwqp->rq_drained);
+}
+
+/**
* i40iw_hwreg_mr - send cqp command for memory registration
* @iwdev: iwarp device
* @iwmr: iwarp mr pointer
@@ -1526,14 +1691,16 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
struct i40iw_mr *iwmr;
struct ib_umem *region;
struct i40iw_mem_reg_req req;
- u32 pbl_depth = 0;
+ u64 pbl_depth = 0;
u32 stag = 0;
u16 access;
- u32 region_length;
+ u64 region_length;
bool use_pbles = false;
unsigned long flags;
int err = -ENOSYS;
+ if (length > I40IW_MAX_MR_SIZE)
+ return ERR_PTR(-EINVAL);
region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region))
return (struct ib_mr *)region;
@@ -1564,7 +1731,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
palloc = &iwpbl->pble_alloc;
iwmr->type = req.reg_type;
- iwmr->page_cnt = pbl_depth;
+ iwmr->page_cnt = (u32)pbl_depth;
switch (req.reg_type) {
case IW_MEMREG_TYPE_QP:
@@ -1881,12 +2048,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
enum i40iw_status_code ret;
int err = 0;
unsigned long flags;
+ bool inv_stag;
iwqp = (struct i40iw_qp *)ibqp;
ukqp = &iwqp->sc_qp.qp_uk;
spin_lock_irqsave(&iwqp->lock, flags);
while (ib_wr) {
+ inv_stag = false;
memset(&info, 0, sizeof(info));
info.wr_id = (u64)(ib_wr->wr_id);
if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
@@ -1896,19 +2065,28 @@ static int i40iw_post_send(struct ib_qp *ibqp,
switch (ib_wr->opcode) {
case IB_WR_SEND:
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- info.op_type = I40IW_OP_TYPE_SEND_SOL;
- else
- info.op_type = I40IW_OP_TYPE_SEND;
+ /* fall-through */
+ case IB_WR_SEND_WITH_INV:
+ if (ib_wr->opcode == IB_WR_SEND) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = I40IW_OP_TYPE_SEND_SOL;
+ else
+ info.op_type = I40IW_OP_TYPE_SEND;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = I40IW_OP_TYPE_SEND_SOL_INV;
+ else
+ info.op_type = I40IW_OP_TYPE_SEND_INV;
+ }
if (ib_wr->send_flags & IB_SEND_INLINE) {
info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
info.op.inline_send.len = ib_wr->sg_list[0].length;
- ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+ ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
} else {
info.op.send.num_sges = ib_wr->num_sge;
info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
- ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+ ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
}
if (ret)
@@ -1936,7 +2114,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
if (ret)
err = -EIO;
break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ inv_stag = true;
+ /* fall-through*/
case IB_WR_RDMA_READ:
+ if (ib_wr->num_sge > I40IW_MAX_SGE_RD) {
+ err = -EINVAL;
+ break;
+ }
info.op_type = I40IW_OP_TYPE_RDMA_READ;
info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
@@ -1944,10 +2129,47 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
- ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false);
+ ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
if (ret)
err = -EIO;
break;
+ case IB_WR_LOCAL_INV:
+ info.op_type = I40IW_OP_TYPE_INV_STAG;
+ info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
+ ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
+ if (ret)
+ err = -EIO;
+ break;
+ case IB_WR_REG_MR:
+ {
+ struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
+ int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
+ int flags = reg_wr(ib_wr)->access;
+ struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
+ struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
+ struct i40iw_fast_reg_stag_info info;
+
+ info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
+ info.access_rights |= i40iw_get_user_access(flags);
+ info.stag_key = reg_wr(ib_wr)->key & 0xff;
+ info.stag_idx = reg_wr(ib_wr)->key >> 8;
+ info.wr_id = ib_wr->wr_id;
+
+ info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
+ info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
+ info.total_len = iwmr->ibmr.length;
+ info.first_pm_pbl_index = palloc->level1.idx;
+ info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
+ info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
+
+ if (page_shift == 21)
+ info.page_size = 1; /* 2M page */
+
+ ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
+ if (ret)
+ err = -EIO;
+ break;
+ }
default:
err = -EINVAL;
i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
@@ -2027,6 +2249,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
enum i40iw_status_code ret;
struct i40iw_cq_uk *ukcq;
struct i40iw_sc_qp *qp;
+ struct i40iw_qp *iwqp;
unsigned long flags;
iwcq = (struct i40iw_cq *)ibcq;
@@ -2037,6 +2260,8 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
if (ret == I40IW_ERR_QUEUE_EMPTY) {
break;
+ } else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
+ continue;
} else if (ret) {
if (!cqe_count)
cqe_count = -1;
@@ -2044,10 +2269,12 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
}
entry->wc_flags = 0;
entry->wr_id = cq_poll_info.wr_id;
- if (!cq_poll_info.error)
- entry->status = IB_WC_SUCCESS;
- else
+ if (cq_poll_info.error) {
entry->status = IB_WC_WR_FLUSH_ERR;
+ entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
+ } else {
+ entry->status = IB_WC_SUCCESS;
+ }
switch (cq_poll_info.op_type) {
case I40IW_OP_TYPE_RDMA_WRITE:
@@ -2071,12 +2298,17 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
break;
}
- entry->vendor_err =
- cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
entry->ex.imm_data = 0;
qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
entry->qp = (struct ib_qp *)qp->back_qp;
entry->src_qp = cq_poll_info.qp_id;
+ iwqp = (struct i40iw_qp *)qp->back_qp;
+ if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) {
+ if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ complete(&iwqp->sq_drained);
+ if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
+ complete(&iwqp->rq_drained);
+ }
entry->byte_len = cq_poll_info.bytes_xfered;
entry++;
cqe_count++;
@@ -2143,7 +2375,6 @@ static int i40iw_get_protocol_stats(struct ib_device *ibdev,
struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
struct timespec curr_time;
static struct timespec last_rd_time = {0, 0};
- enum i40iw_status_code status = 0;
unsigned long flags;
curr_time = current_kernel_time();
@@ -2156,11 +2387,8 @@ static int i40iw_get_protocol_stats(struct ib_device *ibdev,
spin_unlock_irqrestore(&devstat->stats_lock, flags);
} else {
if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
- status = i40iw_vchnl_vf_get_pe_stats(dev,
- &devstat->hw_stats);
-
- if (status)
- return -ENOSYS;
+ if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
+ return -ENOSYS;
}
stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
@@ -2327,6 +2555,10 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.query_device = i40iw_query_device;
iwibdev->ibdev.create_ah = i40iw_create_ah;
iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
+ iwibdev->ibdev.drain_sq = i40iw_drain_sq;
+ iwibdev->ibdev.drain_rq = i40iw_drain_rq;
+ iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
+ iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg;
iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
if (!iwibdev->ibdev.iwcm) {
ib_dealloc_device(&iwibdev->ibdev);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 1101f77080e6..0069be8a5a38 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -92,6 +92,7 @@ struct i40iw_mr {
struct ib_umem *region;
u16 type;
u32 page_cnt;
+ u32 npages;
u32 stag;
u64 length;
u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
@@ -169,5 +170,7 @@ struct i40iw_qp {
struct i40iw_pbl *iwpbl;
struct i40iw_dma_mem q2_ctx_mem;
struct i40iw_dma_mem ietf_mem;
+ struct completion sq_drained;
+ struct completion rq_drained;
};
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c
index cb0f18340e14..e33d4810965c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c
@@ -80,6 +80,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
return 0;
}
-struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
+const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
i40iw_manage_vf_pble_bp
};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h
index f649f3a62e13..4359559ece9c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h
@@ -57,6 +57,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
u64 scratch,
bool post_sq);
-extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
+extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index 6b68f7890b76..3041003c94d2 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -254,7 +254,7 @@ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
u32 vf_id,
struct i40iw_virtchnl_op_buf *vchnl_msg,
- struct i40iw_dev_hw_stats hw_stats)
+ struct i40iw_dev_hw_stats *hw_stats)
{
enum i40iw_status_code ret_code;
u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
@@ -264,7 +264,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats;
+ *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats;
ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
if (ret_code)
i40iw_debug(dev, I40IW_DEBUG_VIRT,
@@ -437,11 +437,9 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
return I40IW_SUCCESS;
}
- for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT;
- iw_vf_idx++) {
+ for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
if (!dev->vf_dev[iw_vf_idx]) {
- if (first_avail_iw_vf ==
- I40IW_MAX_PE_ENABLED_VF_COUNT)
+ if (first_avail_iw_vf == I40IW_MAX_PE_ENABLED_VF_COUNT)
first_avail_iw_vf = iw_vf_idx;
continue;
}
@@ -541,7 +539,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
vf_dev->msg_count--;
- vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats);
+ vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats);
break;
default:
i40iw_debug(dev, I40IW_DEBUG_VIRT,
@@ -596,23 +594,25 @@ enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.parm = vchnl_ver;
vchnl_req.parm_len = sizeof(*vchnl_ver);
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
@@ -626,23 +626,25 @@ enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.parm = hmc_fcn;
vchnl_req.parm_len = sizeof(*hmc_fcn);
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
@@ -660,25 +662,27 @@ enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
&vchnl_req,
rsrc_type,
start_index,
rsrc_count);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
@@ -696,25 +700,27 @@ enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
&vchnl_req,
rsrc_type,
start_index,
rsrc_count);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
/**
@@ -728,21 +734,23 @@ enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_req vchnl_req;
enum i40iw_status_code ret_code;
+ if (!i40iw_vf_clear_to_send(dev))
+ return I40IW_ERR_TIMEOUT;
memset(&vchnl_req, 0, sizeof(vchnl_req));
vchnl_req.dev = dev;
vchnl_req.parm = hw_stats;
vchnl_req.parm_len = sizeof(*hw_stats);
vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+
ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
- if (!ret_code) {
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (!ret_code)
- ret_code = vchnl_req.ret_code;
- else
- dev->vchnl_up = false;
- } else {
+ if (ret_code) {
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"%s Send message failed 0x%0x\n", __func__, ret_code);
+ return ret_code;
}
- return ret_code;
+ ret_code = i40iw_vf_wait_vchnl_resp(dev);
+ if (ret_code)
+ return ret_code;
+ else
+ return vchnl_req.ret_code;
}
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 99451d887266..8f7ad07915b0 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -96,7 +96,7 @@ struct ib_sa_mcmember_data {
u8 scope_join_state;
u8 proxy_join;
u8 reserved[2];
-};
+} __packed __aligned(4);
struct mcast_group {
struct ib_sa_mcmember_data rec;
@@ -747,14 +747,11 @@ static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx
__be64 tid,
union ib_gid *new_mgid)
{
- struct mcast_group *group = NULL, *cur_group;
+ struct mcast_group *group = NULL, *cur_group, *n;
struct mcast_req *req;
- struct list_head *pos;
- struct list_head *n;
mutex_lock(&ctx->mcg_table_lock);
- list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
- group = list_entry(pos, struct mcast_group, mgid0_list);
+ list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) {
mutex_lock(&group->lock);
if (group->last_req_tid == tid) {
if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1eca01cebe51..6c5ac5d8f32f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -717,9 +717,8 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index ce0b5aa8eb9b..631272172a0b 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -528,9 +528,8 @@ static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
-int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int rc;
@@ -541,7 +540,7 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
sizeof(u64) * mr->max_pages,
DMA_TO_DEVICE);
- rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page);
+ rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
sizeof(u64) * mr->max_pages,
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index a00ba4418de9..dabcc65bd65e 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -879,7 +879,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
cq->mcq.irqn = irqn;
- cq->mcq.comp = mlx5_ib_cq_comp;
+ if (context)
+ cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
+ else
+ cq->mcq.comp = mlx5_ib_cq_comp;
cq->mcq.event = mlx5_ib_cq_event;
INIT_LIST_HEAD(&cq->wc_list);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4cb81f68d850..c72797cd9e4f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -38,6 +38,9 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/io-mapping.h>
+#if defined(CONFIG_X86)
+#include <asm/pat.h>
+#endif
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
@@ -517,6 +520,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+ props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
@@ -1068,38 +1075,89 @@ static int get_index(unsigned long offset)
return get_arg(offset);
}
+static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
+{
+ switch (cmd) {
+ case MLX5_IB_MMAP_WC_PAGE:
+ return "WC";
+ case MLX5_IB_MMAP_REGULAR_PAGE:
+ return "best effort WC";
+ case MLX5_IB_MMAP_NC_PAGE:
+ return "NC";
+ default:
+ return NULL;
+ }
+}
+
+static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
+ struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
+{
+ int err;
+ unsigned long idx;
+ phys_addr_t pfn, pa;
+ pgprot_t prot;
+
+ switch (cmd) {
+ case MLX5_IB_MMAP_WC_PAGE:
+/* Some architectures don't support WC memory */
+#if defined(CONFIG_X86)
+ if (!pat_enabled())
+ return -EPERM;
+#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
+ return -EPERM;
+#endif
+ /* fall through */
+ case MLX5_IB_MMAP_REGULAR_PAGE:
+ /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ break;
+ case MLX5_IB_MMAP_NC_PAGE:
+ prot = pgprot_noncached(vma->vm_page_prot);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ idx = get_index(vma->vm_pgoff);
+ if (idx >= uuari->num_uars)
+ return -EINVAL;
+
+ pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+ mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
+
+ vma->vm_page_prot = prot;
+ err = io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot);
+ if (err) {
+ mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
+ err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
+ return -EAGAIN;
+ }
+
+ pa = pfn << PAGE_SHIFT;
+ mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
+ vma->vm_start, &pa);
+
+ return 0;
+}
+
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
- unsigned long idx;
phys_addr_t pfn;
command = get_command(vma->vm_pgoff);
switch (command) {
+ case MLX5_IB_MMAP_WC_PAGE:
+ case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
-
- idx = get_index(vma->vm_pgoff);
- if (idx >= uuari->num_uars)
- return -EINVAL;
-
- pfn = uar_index2pfn(dev, uuari->uars[idx].index);
- mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
- (unsigned long long)pfn);
-
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
- vma->vm_start,
- (unsigned long long)pfn << PAGE_SHIFT);
- break;
+ return uar_mmap(dev, command, vma, uuari);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
@@ -1108,7 +1166,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
- if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ if (vma->vm_flags & VM_WRITE)
return -EPERM;
/* Don't expose to user-space information it shouldn't have */
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b46c25542a7c..c4a9825828bc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -70,6 +70,8 @@ enum {
enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0,
MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ MLX5_IB_MMAP_WC_PAGE = 2,
+ MLX5_IB_MMAP_NC_PAGE = 3,
/* 5 is chosen in order to be compatible with old versions of libmlx5 */
MLX5_IB_MMAP_CORE_CLOCK = 5,
};
@@ -356,6 +358,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
/* QP uses 1 as its source QP number */
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
+ MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
};
struct mlx5_umr_wr {
@@ -712,9 +715,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4d5bff151cdf..8cf2ce50511f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1751,26 +1751,33 @@ done:
static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
struct scatterlist *sgl,
- unsigned short sg_nents)
+ unsigned short sg_nents,
+ unsigned int *sg_offset_p)
{
struct scatterlist *sg = sgl;
struct mlx5_klm *klms = mr->descs;
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
u32 lkey = mr->ibmr.pd->local_dma_lkey;
int i;
- mr->ibmr.iova = sg_dma_address(sg);
+ mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
mr->ndescs = sg_nents;
for_each_sg(sgl, sg, sg_nents, i) {
if (unlikely(i > mr->max_descs))
break;
- klms[i].va = cpu_to_be64(sg_dma_address(sg));
- klms[i].bcount = cpu_to_be32(sg_dma_len(sg));
+ klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
+ klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
klms[i].key = cpu_to_be32(lkey);
mr->ibmr.length += sg_dma_len(sg);
+
+ sg_offset = 0;
}
+ if (sg_offset_p)
+ *sg_offset_p = sg_offset;
+
return i;
}
@@ -1788,9 +1795,8 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
-int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int n;
@@ -1802,9 +1808,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
DMA_TO_DEVICE);
if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
- n = mlx5_ib_sg_to_klms(mr, sg, sg_nents);
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
else
- n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
+ mlx5_set_page);
ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
mr->desc_size * mr->max_descs,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8dee8bc1e0fe..504117657d41 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1028,6 +1028,7 @@ static int get_rq_pas_size(void *qpc)
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin)
{
+ struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
__be64 *qp_pas;
void *in;
@@ -1051,6 +1052,9 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+ if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS)
+ MLX5_SET(rqc, rqc, scatter_fcs, 1);
+
wq = MLX5_ADDR_OF(rqc, rqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, end_padding_mode,
@@ -1136,11 +1140,12 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
if (qp->rq.wqe_cnt) {
+ rq->base.container_mibqp = qp;
+
err = create_raw_packet_qp_rq(dev, rq, in);
if (err)
goto err_destroy_sq;
- rq->base.container_mibqp = qp;
err = create_raw_packet_qp_tir(dev, rq, tdn);
if (err)
@@ -1252,6 +1257,19 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EOPNOTSUPP;
}
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs");
+ return -EOPNOTSUPP;
+ }
+ if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) ||
+ !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+ mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS;
+ }
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 6d3a169c049b..37331e2fdc5f 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -44,6 +44,7 @@
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -903,70 +904,15 @@ void nes_clc(unsigned long parm)
*/
void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length)
{
- char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
- 'a', 'b', 'c', 'd', 'e', 'f'};
- char *ptr;
- char hex_buf[80];
- char ascii_buf[20];
- int num_char;
- int num_ascii;
- int num_hex;
-
if (!(nes_debug_level & dump_debug_level)) {
return;
}
- ptr = addr;
if (length > 0x100) {
nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100);
length = 0x100;
}
- nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length);
-
- memset(ascii_buf, 0, 20);
- memset(hex_buf, 0, 80);
-
- num_ascii = 0;
- num_hex = 0;
- for (num_char = 0; num_char < length; num_char++) {
- if (num_ascii == 8) {
- ascii_buf[num_ascii++] = ' ';
- hex_buf[num_hex++] = '-';
- hex_buf[num_hex++] = ' ';
- }
-
- if (*ptr < 0x20 || *ptr > 0x7e)
- ascii_buf[num_ascii++] = '.';
- else
- ascii_buf[num_ascii++] = *ptr;
- hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)];
- hex_buf[num_hex++] = xlate[*ptr & 0x0f];
- hex_buf[num_hex++] = ' ';
- ptr++;
-
- if (num_ascii >= 17) {
- /* output line and reset */
- nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
- memset(ascii_buf, 0, 20);
- memset(hex_buf, 0, 80);
- num_ascii = 0;
- num_hex = 0;
- }
- }
+ nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length);
- /* output the rest */
- if (num_ascii) {
- while (num_ascii < 17) {
- if (num_ascii == 8) {
- hex_buf[num_hex++] = ' ';
- hex_buf[num_hex++] = ' ';
- }
- hex_buf[num_hex++] = ' ';
- hex_buf[num_hex++] = ' ';
- hex_buf[num_hex++] = ' ';
- num_ascii++;
- }
-
- nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
- }
+ print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true);
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index fba69a39a7eb..464d6da5fe91 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -402,15 +402,14 @@ static int nes_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
-static int nes_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
{
struct nes_mr *nesmr = to_nesmr(ibmr);
nesmr->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page);
}
/**
@@ -981,7 +980,7 @@ static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic,
/**
* nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory.
*/
-static inline void nes_free_qp_mem(struct nes_device *nesdev,
+static void nes_free_qp_mem(struct nes_device *nesdev,
struct nes_qp *nesqp, int virt_wqs)
{
unsigned long flags;
@@ -1315,6 +1314,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
return ERR_PTR(-EINVAL);
}
+ init_completion(&nesqp->sq_drained);
+ init_completion(&nesqp->rq_drained);
nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
init_timer(&nesqp->terminate_timer);
@@ -3452,6 +3453,29 @@ out:
return err;
}
+/**
+ * nes_drain_sq - drain sq
+ * @ibqp: pointer to ibqp
+ */
+static void nes_drain_sq(struct ib_qp *ibqp)
+{
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+
+ if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)
+ wait_for_completion(&nesqp->sq_drained);
+}
+
+/**
+ * nes_drain_rq - drain rq
+ * @ibqp: pointer to ibqp
+ */
+static void nes_drain_rq(struct ib_qp *ibqp)
+{
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+
+ if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)
+ wait_for_completion(&nesqp->rq_drained);
+}
/**
* nes_poll_cq
@@ -3582,6 +3606,13 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
}
}
+ if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) {
+ if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head)
+ complete(&nesqp->sq_drained);
+ if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head)
+ complete(&nesqp->rq_drained);
+ }
+
entry->wr_id = wrid;
entry++;
cqe_count++;
@@ -3754,6 +3785,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
nesibdev->ibdev.post_send = nes_post_send;
nesibdev->ibdev.post_recv = nes_post_recv;
+ nesibdev->ibdev.drain_sq = nes_drain_sq;
+ nesibdev->ibdev.drain_rq = nes_drain_rq;
nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
if (nesibdev->ibdev.iwcm == NULL) {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 70290883d067..e02a5662dc20 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -189,6 +189,8 @@ struct nes_qp {
u8 pau_pending;
u8 pau_state;
__u64 nesuqp_addr;
+ struct completion sq_drained;
+ struct completion rq_drained;
};
struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index a8496a18e20d..b1a3d91fe8b9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -3081,13 +3081,12 @@ static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
-int ocrdma_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents)
+int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
{
struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
mr->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 8b517fd36779..704ef1e9271b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -122,8 +122,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
-int ocrdma_map_mr_sg(struct ib_mr *ibmr,
- struct scatterlist *sg,
- int sg_nents);
+int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 3f062f0dd9d8..f253111e682e 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1090,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd)
qib_dbg_ibdev_exit(&dd->verbs_dev);
#endif
free_percpu(dd->int_counter);
- ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
+ rvt_dealloc_device(&dd->verbs_dev.rdi);
}
u64 qib_int_counter(struct qib_devdata *dd)
@@ -1183,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
bail:
if (!list_empty(&dd->list))
list_del_init(&dd->list);
- ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
+ rvt_dealloc_device(&dd->verbs_dev.rdi);
return ERR_PTR(ret);
}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 9088e26d3ac8..444028a3582a 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -230,7 +230,7 @@ bail:
*
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_rc_req(struct rvt_qp *qp)
+int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index a5f07a64b228..b67779256297 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -739,7 +739,7 @@ void qib_do_send(struct rvt_qp *qp)
struct qib_qp_priv *priv = qp->priv;
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- int (*make_req)(struct rvt_qp *qp);
+ int (*make_req)(struct rvt_qp *qp, unsigned long *flags);
unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
@@ -781,7 +781,7 @@ void qib_do_send(struct rvt_qp *qp)
qp->s_hdrwords = 0;
spin_lock_irqsave(&qp->s_lock, flags);
}
- } while (make_req(qp));
+ } while (make_req(qp, &flags));
spin_unlock_irqrestore(&qp->s_lock, flags);
}
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 7bdbc79ceaa3..1d61bd04f449 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -45,7 +45,7 @@
*
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_uc_req(struct rvt_qp *qp)
+int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index d9502137de62..846e6c726df7 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -238,7 +238,7 @@ drop:
*
* Return 1 if constructed; otherwise, return 0.
*/
-int qib_make_ud_req(struct rvt_qp *qp)
+int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr;
@@ -294,7 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
- unsigned long flags;
+ unsigned long tflags = *flags;
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
@@ -307,10 +307,10 @@ int qib_make_ud_req(struct rvt_qp *qp)
goto bail;
}
qp->s_cur = next_cur;
- local_irq_save(flags);
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irqrestore(&qp->s_lock, tflags);
qib_ud_loopback(qp, wqe);
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irqsave(&qp->s_lock, tflags);
+ *flags = tflags;
qib_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 4b76a8d59337..6888f03c6d61 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -430,11 +430,11 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
void qib_send_rc_ack(struct rvt_qp *qp);
-int qib_make_rc_req(struct rvt_qp *qp);
+int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
-int qib_make_uc_req(struct rvt_qp *qp);
+int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags);
-int qib_make_ud_req(struct rvt_qp *qp);
+int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags);
int qib_register_ib_device(struct qib_devdata *);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index a9e3bcc522c4..0f12c211c385 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -829,13 +829,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ qp->allowed_ops = IB_OPCODE_UD;
break;
case IB_QPT_RC:
- qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ qp->allowed_ops = IB_OPCODE_RC;
break;
case IB_QPT_UC:
- qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+ qp->allowed_ops = IB_OPCODE_UC;
break;
default:
ret = ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 6caf5272ba1f..e1cc2cc42f25 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -106,6 +106,19 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
}
EXPORT_SYMBOL(rvt_alloc_device);
+/**
+ * rvt_dealloc_device - deallocate rdi
+ * @rdi: structure to free
+ *
+ * Free a structure allocated with rvt_alloc_device()
+ */
+void rvt_dealloc_device(struct rvt_dev_info *rdi)
+{
+ kfree(rdi->ports);
+ ib_dealloc_device(&rdi->ibdev);
+}
+EXPORT_SYMBOL(rvt_dealloc_device);
+
static int rvt_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index a53fa5fc0dec..1502199c8e56 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -36,6 +36,27 @@
#include "ipoib.h"
+struct ipoib_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ int stat_offset;
+};
+
+#define IPOIB_NETDEV_STAT(m) { \
+ .stat_string = #m, \
+ .stat_offset = offsetof(struct rtnl_link_stats64, m) }
+
+static const struct ipoib_stats ipoib_gstrings_stats[] = {
+ IPOIB_NETDEV_STAT(rx_packets),
+ IPOIB_NETDEV_STAT(tx_packets),
+ IPOIB_NETDEV_STAT(rx_bytes),
+ IPOIB_NETDEV_STAT(tx_bytes),
+ IPOIB_NETDEV_STAT(tx_errors),
+ IPOIB_NETDEV_STAT(rx_dropped),
+ IPOIB_NETDEV_STAT(tx_dropped)
+};
+
+#define IPOIB_GLOBAL_STATS_LEN ARRAY_SIZE(ipoib_gstrings_stats)
+
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
@@ -92,11 +113,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
return 0;
}
+static void ipoib_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats __always_unused *stats,
+ u64 *data)
+{
+ int i;
+ struct net_device_stats *net_stats = &dev->stats;
+ u8 *p = (u8 *)net_stats;
+
+ for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++)
+ data[i] = *(u64 *)(p + ipoib_gstrings_stats[i].stat_offset);
+
+}
+static void ipoib_get_strings(struct net_device __always_unused *dev,
+ u32 stringset, u8 *data)
+{
+ u8 *p = data;
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < IPOIB_GLOBAL_STATS_LEN; i++) {
+ memcpy(p, ipoib_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ break;
+ case ETH_SS_TEST:
+ default:
+ break;
+ }
+}
+static int ipoib_get_sset_count(struct net_device __always_unused *dev,
+ int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return IPOIB_GLOBAL_STATS_LEN;
+ case ETH_SS_TEST:
+ default:
+ break;
+ }
+ return -EOPNOTSUPP;
+}
static const struct ethtool_ops ipoib_ethtool_ops = {
.get_drvinfo = ipoib_get_drvinfo,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
+ .get_strings = ipoib_get_strings,
+ .get_ethtool_stats = ipoib_get_ethtool_stats,
+ .get_sset_count = ipoib_get_sset_count,
};
void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 3643d559ba31..418e5a1c8744 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(data_debug_level,
"Enable data path debug tracing if > 0");
#endif
-static DEFINE_MUTEX(pkey_mutex);
-
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ib_pd *pd, struct ib_ah_attr *attr)
{
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 9a391cc5b9b3..90be56893414 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -236,7 +236,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
page_vec->npages = 0;
page_vec->fake_mr.page_size = SIZE_4K;
plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
- mem->size, iser_set_page);
+ mem->size, NULL, iser_set_page);
if (unlikely(plen < mem->size)) {
iser_err("page vec too short to hold this SG\n");
iser_data_buf_dump(mem, device->ib_device);
@@ -446,7 +446,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
- n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
+ n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K);
if (unlikely(n != mem->size)) {
iser_err("failed to map sg (%d/%d)\n",
n, mem->size);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 411e4464ca23..897b5a4993e8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -33,7 +33,8 @@
#define ISERT_MAX_CONN 8
#define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN)
-#define ISER_MAX_TX_CQ_LEN (ISERT_QP_MAX_REQ_DTOS * ISERT_MAX_CONN)
+#define ISER_MAX_TX_CQ_LEN \
+ ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN)
#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \
ISERT_MAX_CONN)
@@ -46,14 +47,6 @@ static LIST_HEAD(device_list);
static struct workqueue_struct *isert_comp_wq;
static struct workqueue_struct *isert_release_wq;
-static void
-isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
-static int
-isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
-static void
-isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
-static int
-isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
static int
isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
static int
@@ -142,6 +135,7 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
+ attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
device->ib_device->attrs.max_sge_rd);
@@ -270,9 +264,9 @@ isert_alloc_comps(struct isert_device *device)
device->ib_device->num_comp_vectors));
isert_info("Using %d CQs, %s supports %d vectors support "
- "Fast registration %d pi_capable %d\n",
+ "pi_capable %d\n",
device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors, device->use_fastreg,
+ device->ib_device->num_comp_vectors,
device->pi_capable);
device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
@@ -313,18 +307,6 @@ isert_create_device_ib_res(struct isert_device *device)
isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
- /* asign function handlers */
- if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
- ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
- device->use_fastreg = 1;
- device->reg_rdma_mem = isert_reg_rdma;
- device->unreg_rdma_mem = isert_unreg_rdma;
- } else {
- device->use_fastreg = 0;
- device->reg_rdma_mem = isert_map_rdma;
- device->unreg_rdma_mem = isert_unmap_cmd;
- }
-
ret = isert_alloc_comps(device);
if (ret)
goto out;
@@ -417,146 +399,6 @@ isert_device_get(struct rdma_cm_id *cma_id)
}
static void
-isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
-{
- struct fast_reg_descriptor *fr_desc, *tmp;
- int i = 0;
-
- if (list_empty(&isert_conn->fr_pool))
- return;
-
- isert_info("Freeing conn %p fastreg pool", isert_conn);
-
- list_for_each_entry_safe(fr_desc, tmp,
- &isert_conn->fr_pool, list) {
- list_del(&fr_desc->list);
- ib_dereg_mr(fr_desc->data_mr);
- if (fr_desc->pi_ctx) {
- ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
- ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
- kfree(fr_desc->pi_ctx);
- }
- kfree(fr_desc);
- ++i;
- }
-
- if (i < isert_conn->fr_pool_size)
- isert_warn("Pool still has %d regions registered\n",
- isert_conn->fr_pool_size - i);
-}
-
-static int
-isert_create_pi_ctx(struct fast_reg_descriptor *desc,
- struct ib_device *device,
- struct ib_pd *pd)
-{
- struct pi_context *pi_ctx;
- int ret;
-
- pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
- if (!pi_ctx) {
- isert_err("Failed to allocate pi context\n");
- return -ENOMEM;
- }
-
- pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(pi_ctx->prot_mr)) {
- isert_err("Failed to allocate prot frmr err=%ld\n",
- PTR_ERR(pi_ctx->prot_mr));
- ret = PTR_ERR(pi_ctx->prot_mr);
- goto err_pi_ctx;
- }
- desc->ind |= ISERT_PROT_KEY_VALID;
-
- pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2);
- if (IS_ERR(pi_ctx->sig_mr)) {
- isert_err("Failed to allocate signature enabled mr err=%ld\n",
- PTR_ERR(pi_ctx->sig_mr));
- ret = PTR_ERR(pi_ctx->sig_mr);
- goto err_prot_mr;
- }
-
- desc->pi_ctx = pi_ctx;
- desc->ind |= ISERT_SIG_KEY_VALID;
- desc->ind &= ~ISERT_PROTECTED;
-
- return 0;
-
-err_prot_mr:
- ib_dereg_mr(pi_ctx->prot_mr);
-err_pi_ctx:
- kfree(pi_ctx);
-
- return ret;
-}
-
-static int
-isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
- struct fast_reg_descriptor *fr_desc)
-{
- fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(fr_desc->data_mr)) {
- isert_err("Failed to allocate data frmr err=%ld\n",
- PTR_ERR(fr_desc->data_mr));
- return PTR_ERR(fr_desc->data_mr);
- }
- fr_desc->ind |= ISERT_DATA_KEY_VALID;
-
- isert_dbg("Created fr_desc %p\n", fr_desc);
-
- return 0;
-}
-
-static int
-isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
-{
- struct fast_reg_descriptor *fr_desc;
- struct isert_device *device = isert_conn->device;
- struct se_session *se_sess = isert_conn->conn->sess->se_sess;
- struct se_node_acl *se_nacl = se_sess->se_node_acl;
- int i, ret, tag_num;
- /*
- * Setup the number of FRMRs based upon the number of tags
- * available to session in iscsi_target_locate_portal().
- */
- tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
- tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
-
- isert_conn->fr_pool_size = 0;
- for (i = 0; i < tag_num; i++) {
- fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
- if (!fr_desc) {
- isert_err("Failed to allocate fast_reg descriptor\n");
- ret = -ENOMEM;
- goto err;
- }
-
- ret = isert_create_fr_desc(device->ib_device,
- device->pd, fr_desc);
- if (ret) {
- isert_err("Failed to create fastreg descriptor err=%d\n",
- ret);
- kfree(fr_desc);
- goto err;
- }
-
- list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
- isert_conn->fr_pool_size++;
- }
-
- isert_dbg("Creating conn %p fastreg pool size=%d",
- isert_conn, isert_conn->fr_pool_size);
-
- return 0;
-
-err:
- isert_conn_free_fastreg_pool(isert_conn);
- return ret;
-}
-
-static void
isert_init_conn(struct isert_conn *isert_conn)
{
isert_conn->state = ISER_CONN_INIT;
@@ -565,8 +407,6 @@ isert_init_conn(struct isert_conn *isert_conn)
init_completion(&isert_conn->login_req_comp);
kref_init(&isert_conn->kref);
mutex_init(&isert_conn->mutex);
- spin_lock_init(&isert_conn->pool_lock);
- INIT_LIST_HEAD(&isert_conn->fr_pool);
INIT_WORK(&isert_conn->release_work, isert_release_work);
}
@@ -739,9 +579,6 @@ isert_connect_release(struct isert_conn *isert_conn)
BUG_ON(!device);
- if (device->use_fastreg)
- isert_conn_free_fastreg_pool(isert_conn);
-
isert_free_rx_descriptors(isert_conn);
if (isert_conn->cm_id)
rdma_destroy_id(isert_conn->cm_id);
@@ -1080,7 +917,6 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
{
struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
- isert_cmd->iser_ib_op = ISER_IB_SEND;
tx_desc->tx_cqe.done = isert_send_done;
send_wr->wr_cqe = &tx_desc->tx_cqe;
@@ -1160,16 +996,6 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
}
if (!login->login_failed) {
if (login->login_complete) {
- if (!conn->sess->sess_ops->SessionType &&
- isert_conn->device->use_fastreg) {
- ret = isert_conn_create_fastreg_pool(isert_conn);
- if (ret) {
- isert_err("Conn: %p failed to create"
- " fastreg pool\n", isert_conn);
- return ret;
- }
- }
-
ret = isert_alloc_rx_descriptors(isert_conn);
if (ret)
return ret;
@@ -1633,97 +1459,26 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
}
-static int
-isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct scatterlist *sg, u32 nents, u32 length, u32 offset,
- enum iser_ib_op_code op, struct isert_data_buf *data)
-{
- struct ib_device *ib_dev = isert_conn->cm_id->device;
-
- data->dma_dir = op == ISER_IB_RDMA_WRITE ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
- data->len = length - offset;
- data->offset = offset;
- data->sg_off = data->offset / PAGE_SIZE;
-
- data->sg = &sg[data->sg_off];
- data->nents = min_t(unsigned int, nents - data->sg_off,
- ISCSI_ISER_SG_TABLESIZE);
- data->len = min_t(unsigned int, data->len, ISCSI_ISER_SG_TABLESIZE *
- PAGE_SIZE);
-
- data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents,
- data->dma_dir);
- if (unlikely(!data->dma_nents)) {
- isert_err("Cmd: unable to dma map SGs %p\n", sg);
- return -EINVAL;
- }
-
- isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
- isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
-
- return 0;
-}
-
static void
-isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data)
+isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn)
{
- struct ib_device *ib_dev = isert_conn->cm_id->device;
-
- ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir);
- memset(data, 0, sizeof(*data));
-}
-
-
-
-static void
-isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
-{
- isert_dbg("Cmd %p\n", isert_cmd);
+ struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
+ enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
- if (isert_cmd->data.sg) {
- isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
- isert_unmap_data_buf(isert_conn, &isert_cmd->data);
- }
-
- if (isert_cmd->rdma_wr) {
- isert_dbg("Cmd %p free send_wr\n", isert_cmd);
- kfree(isert_cmd->rdma_wr);
- isert_cmd->rdma_wr = NULL;
- }
-
- if (isert_cmd->ib_sge) {
- isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
- kfree(isert_cmd->ib_sge);
- isert_cmd->ib_sge = NULL;
- }
-}
-
-static void
-isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
-{
- isert_dbg("Cmd %p\n", isert_cmd);
-
- if (isert_cmd->fr_desc) {
- isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, isert_cmd->fr_desc);
- if (isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
- isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
- isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
- }
- spin_lock_bh(&isert_conn->pool_lock);
- list_add_tail(&isert_cmd->fr_desc->list, &isert_conn->fr_pool);
- spin_unlock_bh(&isert_conn->pool_lock);
- isert_cmd->fr_desc = NULL;
- }
+ if (!cmd->rw.nr_ops)
+ return;
- if (isert_cmd->data.sg) {
- isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
- isert_unmap_data_buf(isert_conn, &isert_cmd->data);
+ if (isert_prot_cmd(conn, se_cmd)) {
+ rdma_rw_ctx_destroy_signature(&cmd->rw, conn->qp,
+ conn->cm_id->port_num, se_cmd->t_data_sg,
+ se_cmd->t_data_nents, se_cmd->t_prot_sg,
+ se_cmd->t_prot_nents, dir);
+ } else {
+ rdma_rw_ctx_destroy(&cmd->rw, conn->qp, conn->cm_id->port_num,
+ se_cmd->t_data_sg, se_cmd->t_data_nents, dir);
}
- isert_cmd->ib_sge = NULL;
- isert_cmd->rdma_wr = NULL;
+ cmd->rw.nr_ops = 0;
}
static void
@@ -1732,7 +1487,6 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
struct iscsi_conn *conn = isert_conn->conn;
- struct isert_device *device = isert_conn->device;
struct iscsi_text_rsp *hdr;
isert_dbg("Cmd %p\n", isert_cmd);
@@ -1760,7 +1514,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
}
}
- device->unreg_rdma_mem(isert_cmd, isert_conn);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
transport_generic_free_cmd(&cmd->se_cmd, 0);
break;
case ISCSI_OP_SCSI_TMFUNC:
@@ -1894,14 +1648,9 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
isert_dbg("Cmd %p\n", isert_cmd);
- if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
- ret = isert_check_pi_status(cmd,
- isert_cmd->fr_desc->pi_ctx->sig_mr);
- isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
- }
+ ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
- device->unreg_rdma_mem(isert_cmd, isert_conn);
- isert_cmd->rdma_wr_num = 0;
if (ret)
transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
else
@@ -1929,16 +1678,12 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
isert_dbg("Cmd %p\n", isert_cmd);
- if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
- ret = isert_check_pi_status(se_cmd,
- isert_cmd->fr_desc->pi_ctx->sig_mr);
- isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
- }
-
iscsit_stop_dataout_timer(cmd);
- device->unreg_rdma_mem(isert_cmd, isert_conn);
- cmd->write_data_done = isert_cmd->data.len;
- isert_cmd->rdma_wr_num = 0;
+
+ if (isert_prot_cmd(isert_conn, se_cmd))
+ ret = isert_check_pi_status(se_cmd, isert_cmd->rw.sig->sig_mr);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
+ cmd->write_data_done = 0;
isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
spin_lock_bh(&cmd->istate_lock);
@@ -2111,7 +1856,6 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
{
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
- struct isert_device *device = isert_conn->device;
spin_lock_bh(&conn->cmd_lock);
if (!list_empty(&cmd->i_conn_node))
@@ -2120,8 +1864,7 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
if (cmd->data_direction == DMA_TO_DEVICE)
iscsit_stop_dataout_timer(cmd);
-
- device->unreg_rdma_mem(isert_cmd, isert_conn);
+ isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
}
static enum target_prot_op
@@ -2274,234 +2017,6 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
return isert_post_response(isert_conn, isert_cmd);
}
-static int
-isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr,
- u32 data_left, u32 offset)
-{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
- struct scatterlist *sg_start, *tmp_sg;
- struct isert_device *device = isert_conn->device;
- struct ib_device *ib_dev = device->ib_device;
- u32 sg_off, page_off;
- int i = 0, sg_nents;
-
- sg_off = offset / PAGE_SIZE;
- sg_start = &cmd->se_cmd.t_data_sg[sg_off];
- sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
- page_off = offset % PAGE_SIZE;
-
- rdma_wr->wr.sg_list = ib_sge;
- rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
-
- /*
- * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
- */
- for_each_sg(sg_start, tmp_sg, sg_nents, i) {
- isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, "
- "page_off: %u\n",
- (unsigned long long)tmp_sg->dma_address,
- tmp_sg->length, page_off);
-
- ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
- ib_sge->length = min_t(u32, data_left,
- ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
- ib_sge->lkey = device->pd->local_dma_lkey;
-
- isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n",
- ib_sge->addr, ib_sge->length, ib_sge->lkey);
- page_off = 0;
- data_left -= ib_sge->length;
- if (!data_left)
- break;
- ib_sge++;
- isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
- }
-
- rdma_wr->wr.num_sge = ++i;
- isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
- rdma_wr->wr.sg_list, rdma_wr->wr.num_sge);
-
- return rdma_wr->wr.num_sge;
-}
-
-static int
-isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
-{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
- struct se_cmd *se_cmd = &cmd->se_cmd;
- struct isert_conn *isert_conn = conn->context;
- struct isert_data_buf *data = &isert_cmd->data;
- struct ib_rdma_wr *rdma_wr;
- struct ib_sge *ib_sge;
- u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
- int ret = 0, i, ib_sge_cnt;
-
- offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
- cmd->write_data_done : 0;
- ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
- se_cmd->t_data_nents, se_cmd->data_length,
- offset, isert_cmd->iser_ib_op,
- &isert_cmd->data);
- if (ret)
- return ret;
-
- data_left = data->len;
- offset = data->offset;
-
- ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);
- if (!ib_sge) {
- isert_warn("Unable to allocate ib_sge\n");
- ret = -ENOMEM;
- goto unmap_cmd;
- }
- isert_cmd->ib_sge = ib_sge;
-
- isert_cmd->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
- isert_cmd->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) *
- isert_cmd->rdma_wr_num, GFP_KERNEL);
- if (!isert_cmd->rdma_wr) {
- isert_dbg("Unable to allocate isert_cmd->rdma_wr\n");
- ret = -ENOMEM;
- goto unmap_cmd;
- }
-
- rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
-
- for (i = 0; i < isert_cmd->rdma_wr_num; i++) {
- rdma_wr = &isert_cmd->rdma_wr[i];
- data_len = min(data_left, rdma_write_max);
-
- rdma_wr->wr.send_flags = 0;
- if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
-
- rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
- rdma_wr->remote_addr = isert_cmd->read_va + offset;
- rdma_wr->rkey = isert_cmd->read_stag;
- if (i + 1 == isert_cmd->rdma_wr_num)
- rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr;
- else
- rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
- } else {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
-
- rdma_wr->wr.opcode = IB_WR_RDMA_READ;
- rdma_wr->remote_addr = isert_cmd->write_va + va_offset;
- rdma_wr->rkey = isert_cmd->write_stag;
- if (i + 1 == isert_cmd->rdma_wr_num)
- rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
- else
- rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
- }
-
- ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
- rdma_wr, data_len, offset);
- ib_sge += ib_sge_cnt;
-
- offset += data_len;
- va_offset += data_len;
- data_left -= data_len;
- }
-
- return 0;
-unmap_cmd:
- isert_unmap_data_buf(isert_conn, data);
-
- return ret;
-}
-
-static inline void
-isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
-{
- u32 rkey;
-
- memset(inv_wr, 0, sizeof(*inv_wr));
- inv_wr->wr_cqe = NULL;
- inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->ex.invalidate_rkey = mr->rkey;
-
- /* Bump the key */
- rkey = ib_inc_rkey(mr->rkey);
- ib_update_fast_reg_key(mr, rkey);
-}
-
-static int
-isert_fast_reg_mr(struct isert_conn *isert_conn,
- struct fast_reg_descriptor *fr_desc,
- struct isert_data_buf *mem,
- enum isert_indicator ind,
- struct ib_sge *sge)
-{
- struct isert_device *device = isert_conn->device;
- struct ib_device *ib_dev = device->ib_device;
- struct ib_mr *mr;
- struct ib_reg_wr reg_wr;
- struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
- int ret, n;
-
- if (mem->dma_nents == 1) {
- sge->lkey = device->pd->local_dma_lkey;
- sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
- sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
- isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
- sge->addr, sge->length, sge->lkey);
- return 0;
- }
-
- if (ind == ISERT_DATA_KEY_VALID)
- /* Registering data buffer */
- mr = fr_desc->data_mr;
- else
- /* Registering protection buffer */
- mr = fr_desc->pi_ctx->prot_mr;
-
- if (!(fr_desc->ind & ind)) {
- isert_inv_rkey(&inv_wr, mr);
- wr = &inv_wr;
- }
-
- n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE);
- if (unlikely(n != mem->nents)) {
- isert_err("failed to map mr sg (%d/%d)\n",
- n, mem->nents);
- return n < 0 ? n : -EINVAL;
- }
-
- isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
- fr_desc, mem->nents, mem->offset);
-
- reg_wr.wr.next = NULL;
- reg_wr.wr.opcode = IB_WR_REG_MR;
- reg_wr.wr.wr_cqe = NULL;
- reg_wr.wr.send_flags = 0;
- reg_wr.wr.num_sge = 0;
- reg_wr.mr = mr;
- reg_wr.key = mr->lkey;
- reg_wr.access = IB_ACCESS_LOCAL_WRITE;
-
- if (!wr)
- wr = &reg_wr.wr;
- else
- wr->next = &reg_wr.wr;
-
- ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
- if (ret) {
- isert_err("fast registration failed, ret:%d\n", ret);
- return ret;
- }
- fr_desc->ind &= ~ind;
-
- sge->lkey = mr->lkey;
- sge->addr = mr->iova;
- sge->length = mr->length;
-
- isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
- sge->addr, sge->length, sge->lkey);
-
- return ret;
-}
-
static inline void
isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
struct ib_sig_domain *domain)
@@ -2526,6 +2041,8 @@ isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
static int
isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
{
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+
switch (se_cmd->prot_op) {
case TARGET_PROT_DIN_INSERT:
case TARGET_PROT_DOUT_STRIP:
@@ -2547,228 +2064,59 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
return -EINVAL;
}
+ sig_attrs->check_mask =
+ (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
+ (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
+ (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
return 0;
}
-static inline u8
-isert_set_prot_checks(u8 prot_checks)
-{
- return (prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
- (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
- (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
-}
-
-static int
-isert_reg_sig_mr(struct isert_conn *isert_conn,
- struct isert_cmd *isert_cmd,
- struct fast_reg_descriptor *fr_desc)
-{
- struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
- struct ib_sig_handover_wr sig_wr;
- struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
- struct pi_context *pi_ctx = fr_desc->pi_ctx;
- struct ib_sig_attrs sig_attrs;
- int ret;
-
- memset(&sig_attrs, 0, sizeof(sig_attrs));
- ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
- if (ret)
- goto err;
-
- sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks);
-
- if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) {
- isert_inv_rkey(&inv_wr, pi_ctx->sig_mr);
- wr = &inv_wr;
- }
-
- memset(&sig_wr, 0, sizeof(sig_wr));
- sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
- sig_wr.wr.wr_cqe = NULL;
- sig_wr.wr.sg_list = &isert_cmd->ib_sg[DATA];
- sig_wr.wr.num_sge = 1;
- sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
- sig_wr.sig_attrs = &sig_attrs;
- sig_wr.sig_mr = pi_ctx->sig_mr;
- if (se_cmd->t_prot_sg)
- sig_wr.prot = &isert_cmd->ib_sg[PROT];
-
- if (!wr)
- wr = &sig_wr.wr;
- else
- wr->next = &sig_wr.wr;
-
- ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
- if (ret) {
- isert_err("fast registration failed, ret:%d\n", ret);
- goto err;
- }
- fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
-
- isert_cmd->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
- isert_cmd->ib_sg[SIG].addr = 0;
- isert_cmd->ib_sg[SIG].length = se_cmd->data_length;
- if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
- se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
- /*
- * We have protection guards on the wire
- * so we need to set a larget transfer
- */
- isert_cmd->ib_sg[SIG].length += se_cmd->prot_length;
-
- isert_dbg("sig_sge: addr: 0x%llx length: %u lkey: %x\n",
- isert_cmd->ib_sg[SIG].addr, isert_cmd->ib_sg[SIG].length,
- isert_cmd->ib_sg[SIG].lkey);
-err:
- return ret;
-}
-
static int
-isert_handle_prot_cmd(struct isert_conn *isert_conn,
- struct isert_cmd *isert_cmd)
-{
- struct isert_device *device = isert_conn->device;
- struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
+isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
+ struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+{
+ struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd;
+ enum dma_data_direction dir = target_reverse_dma_direction(se_cmd);
+ u8 port_num = conn->cm_id->port_num;
+ u64 addr;
+ u32 rkey, offset;
int ret;
- if (!isert_cmd->fr_desc->pi_ctx) {
- ret = isert_create_pi_ctx(isert_cmd->fr_desc,
- device->ib_device,
- device->pd);
- if (ret) {
- isert_err("conn %p failed to allocate pi_ctx\n",
- isert_conn);
- return ret;
- }
- }
-
- if (se_cmd->t_prot_sg) {
- ret = isert_map_data_buf(isert_conn, isert_cmd,
- se_cmd->t_prot_sg,
- se_cmd->t_prot_nents,
- se_cmd->prot_length,
- 0,
- isert_cmd->iser_ib_op,
- &isert_cmd->prot);
- if (ret) {
- isert_err("conn %p failed to map protection buffer\n",
- isert_conn);
- return ret;
- }
-
- memset(&isert_cmd->ib_sg[PROT], 0, sizeof(isert_cmd->ib_sg[PROT]));
- ret = isert_fast_reg_mr(isert_conn, isert_cmd->fr_desc,
- &isert_cmd->prot,
- ISERT_PROT_KEY_VALID,
- &isert_cmd->ib_sg[PROT]);
- if (ret) {
- isert_err("conn %p failed to fast reg mr\n",
- isert_conn);
- goto unmap_prot_cmd;
- }
- }
-
- ret = isert_reg_sig_mr(isert_conn, isert_cmd, isert_cmd->fr_desc);
- if (ret) {
- isert_err("conn %p failed to fast reg mr\n",
- isert_conn);
- goto unmap_prot_cmd;
- }
- isert_cmd->fr_desc->ind |= ISERT_PROTECTED;
-
- return 0;
-
-unmap_prot_cmd:
- if (se_cmd->t_prot_sg)
- isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
-
- return ret;
-}
-
-static int
-isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
-{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
- struct se_cmd *se_cmd = &cmd->se_cmd;
- struct isert_conn *isert_conn = conn->context;
- struct fast_reg_descriptor *fr_desc = NULL;
- struct ib_rdma_wr *rdma_wr;
- struct ib_sge *ib_sg;
- u32 offset;
- int ret = 0;
- unsigned long flags;
-
- offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
- cmd->write_data_done : 0;
- ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
- se_cmd->t_data_nents, se_cmd->data_length,
- offset, isert_cmd->iser_ib_op,
- &isert_cmd->data);
- if (ret)
- return ret;
-
- if (isert_cmd->data.dma_nents != 1 ||
- isert_prot_cmd(isert_conn, se_cmd)) {
- spin_lock_irqsave(&isert_conn->pool_lock, flags);
- fr_desc = list_first_entry(&isert_conn->fr_pool,
- struct fast_reg_descriptor, list);
- list_del(&fr_desc->list);
- spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
- isert_cmd->fr_desc = fr_desc;
- }
-
- ret = isert_fast_reg_mr(isert_conn, fr_desc, &isert_cmd->data,
- ISERT_DATA_KEY_VALID, &isert_cmd->ib_sg[DATA]);
- if (ret)
- goto unmap_cmd;
-
- if (isert_prot_cmd(isert_conn, se_cmd)) {
- ret = isert_handle_prot_cmd(isert_conn, isert_cmd);
- if (ret)
- goto unmap_cmd;
-
- ib_sg = &isert_cmd->ib_sg[SIG];
+ if (dir == DMA_FROM_DEVICE) {
+ addr = cmd->write_va;
+ rkey = cmd->write_stag;
+ offset = cmd->iscsi_cmd->write_data_done;
} else {
- ib_sg = &isert_cmd->ib_sg[DATA];
+ addr = cmd->read_va;
+ rkey = cmd->read_stag;
+ offset = 0;
}
- memcpy(&isert_cmd->s_ib_sge, ib_sg, sizeof(*ib_sg));
- isert_cmd->ib_sge = &isert_cmd->s_ib_sge;
- isert_cmd->rdma_wr_num = 1;
- memset(&isert_cmd->s_rdma_wr, 0, sizeof(isert_cmd->s_rdma_wr));
- isert_cmd->rdma_wr = &isert_cmd->s_rdma_wr;
+ if (isert_prot_cmd(conn, se_cmd)) {
+ struct ib_sig_attrs sig_attrs;
- rdma_wr = &isert_cmd->s_rdma_wr;
- rdma_wr->wr.sg_list = &isert_cmd->s_ib_sge;
- rdma_wr->wr.num_sge = 1;
- rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
- if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
+ ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
+ if (ret)
+ return ret;
- rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
- rdma_wr->remote_addr = isert_cmd->read_va;
- rdma_wr->rkey = isert_cmd->read_stag;
- rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
- 0 : IB_SEND_SIGNALED;
+ WARN_ON_ONCE(offset);
+ ret = rdma_rw_ctx_signature_init(&cmd->rw, conn->qp, port_num,
+ se_cmd->t_data_sg, se_cmd->t_data_nents,
+ se_cmd->t_prot_sg, se_cmd->t_prot_nents,
+ &sig_attrs, addr, rkey, dir);
} else {
- isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
-
- rdma_wr->wr.opcode = IB_WR_RDMA_READ;
- rdma_wr->remote_addr = isert_cmd->write_va;
- rdma_wr->rkey = isert_cmd->write_stag;
- rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
+ ret = rdma_rw_ctx_init(&cmd->rw, conn->qp, port_num,
+ se_cmd->t_data_sg, se_cmd->t_data_nents,
+ offset, addr, rkey, dir);
}
-
- return 0;
-
-unmap_cmd:
- if (fr_desc) {
- spin_lock_irqsave(&isert_conn->pool_lock, flags);
- list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
- spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
+ if (ret < 0) {
+ isert_err("Cmd: %p failed to prepare RDMA res\n", cmd);
+ return ret;
}
- isert_unmap_data_buf(isert_conn, &isert_cmd->data);
+ ret = rdma_rw_ctx_post(&cmd->rw, conn->qp, port_num, cqe, chain_wr);
+ if (ret < 0)
+ isert_err("Cmd: %p failed to post RDMA res\n", cmd);
return ret;
}
@@ -2778,21 +2126,17 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = conn->context;
- struct isert_device *device = isert_conn->device;
- struct ib_send_wr *wr_failed;
+ struct ib_cqe *cqe = NULL;
+ struct ib_send_wr *chain_wr = NULL;
int rc;
isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n",
isert_cmd, se_cmd->data_length);
- isert_cmd->iser_ib_op = ISER_IB_RDMA_WRITE;
- rc = device->reg_rdma_mem(isert_cmd, conn);
- if (rc) {
- isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
- return rc;
- }
-
- if (!isert_prot_cmd(isert_conn, se_cmd)) {
+ if (isert_prot_cmd(isert_conn, se_cmd)) {
+ isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
+ cqe = &isert_cmd->tx_desc.tx_cqe;
+ } else {
/*
* Build isert_conn->tx_desc for iSCSI response PDU and attach
*/
@@ -2803,56 +2147,35 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
isert_init_send_wr(isert_conn, isert_cmd,
&isert_cmd->tx_desc.send_wr);
- isert_cmd->s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
- isert_cmd->rdma_wr_num += 1;
rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
if (rc) {
isert_err("ib_post_recv failed with %d\n", rc);
return rc;
}
- }
- rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed);
- if (rc)
- isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
-
- if (!isert_prot_cmd(isert_conn, se_cmd))
- isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
- "READ\n", isert_cmd);
- else
- isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
- isert_cmd);
+ chain_wr = &isert_cmd->tx_desc.send_wr;
+ }
+ isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
+ isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd);
return 1;
}
static int
isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
{
- struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = conn->context;
- struct isert_device *device = isert_conn->device;
- struct ib_send_wr *wr_failed;
- int rc;
isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
- isert_cmd, se_cmd->data_length, cmd->write_data_done);
- isert_cmd->iser_ib_op = ISER_IB_RDMA_READ;
- rc = device->reg_rdma_mem(isert_cmd, conn);
- if (rc) {
- isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
- return rc;
- }
+ isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done);
- rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed);
- if (rc)
- isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
+ isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
+ isert_rdma_rw_ctx_post(isert_cmd, conn->context,
+ &isert_cmd->tx_desc.tx_cqe, NULL);
isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
isert_cmd);
-
return 0;
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 147900cbb578..e512ba941f2f 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -3,6 +3,7 @@
#include <linux/in6.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
#include <scsi/iser.h>
@@ -53,10 +54,7 @@
#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-#define ISERT_INFLIGHT_DATAOUTS 8
-
-#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
- (1 + ISERT_INFLIGHT_DATAOUTS) + \
+#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \
ISERT_MAX_TX_MISC_PDUS + \
ISERT_MAX_RX_MISC_PDUS)
@@ -71,13 +69,6 @@ enum isert_desc_type {
ISCSI_TX_DATAIN
};
-enum iser_ib_op_code {
- ISER_IB_RECV,
- ISER_IB_SEND,
- ISER_IB_RDMA_WRITE,
- ISER_IB_RDMA_READ,
-};
-
enum iser_conn_state {
ISER_CONN_INIT,
ISER_CONN_UP,
@@ -118,42 +109,6 @@ static inline struct iser_tx_desc *cqe_to_tx_desc(struct ib_cqe *cqe)
return container_of(cqe, struct iser_tx_desc, tx_cqe);
}
-
-enum isert_indicator {
- ISERT_PROTECTED = 1 << 0,
- ISERT_DATA_KEY_VALID = 1 << 1,
- ISERT_PROT_KEY_VALID = 1 << 2,
- ISERT_SIG_KEY_VALID = 1 << 3,
-};
-
-struct pi_context {
- struct ib_mr *prot_mr;
- struct ib_mr *sig_mr;
-};
-
-struct fast_reg_descriptor {
- struct list_head list;
- struct ib_mr *data_mr;
- u8 ind;
- struct pi_context *pi_ctx;
-};
-
-struct isert_data_buf {
- struct scatterlist *sg;
- int nents;
- u32 sg_off;
- u32 len; /* cur_rdma_length */
- u32 offset;
- unsigned int dma_nents;
- enum dma_data_direction dma_dir;
-};
-
-enum {
- DATA = 0,
- PROT = 1,
- SIG = 2,
-};
-
struct isert_cmd {
uint32_t read_stag;
uint32_t write_stag;
@@ -166,16 +121,7 @@ struct isert_cmd {
struct iscsi_cmd *iscsi_cmd;
struct iser_tx_desc tx_desc;
struct iser_rx_desc *rx_desc;
- enum iser_ib_op_code iser_ib_op;
- struct ib_sge *ib_sge;
- struct ib_sge s_ib_sge;
- int rdma_wr_num;
- struct ib_rdma_wr *rdma_wr;
- struct ib_rdma_wr s_rdma_wr;
- struct ib_sge ib_sg[3];
- struct isert_data_buf data;
- struct isert_data_buf prot;
- struct fast_reg_descriptor *fr_desc;
+ struct rdma_rw_ctx rw;
struct work_struct comp_work;
struct scatterlist sg;
};
@@ -210,10 +156,6 @@ struct isert_conn {
struct isert_device *device;
struct mutex mutex;
struct kref kref;
- struct list_head fr_pool;
- int fr_pool_size;
- /* lock to protect fastreg pool */
- spinlock_t pool_lock;
struct work_struct release_work;
bool logout_posted;
bool snd_w_inv;
@@ -236,7 +178,6 @@ struct isert_comp {
};
struct isert_device {
- int use_fastreg;
bool pi_capable;
int refcount;
struct ib_device *ib_device;
@@ -244,10 +185,6 @@ struct isert_device {
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
- int (*reg_rdma_mem)(struct isert_cmd *isert_cmd,
- struct iscsi_conn *conn);
- void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd,
- struct isert_conn *isert_conn);
};
struct isert_np {
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 369a75e1f44e..646de170ec12 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -70,6 +70,7 @@ static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
static bool prefer_fr = true;
static bool register_always = true;
+static bool never_register;
static int topspin_workarounds = 1;
module_param(srp_sg_tablesize, uint, 0444);
@@ -99,6 +100,9 @@ module_param(register_always, bool, 0444);
MODULE_PARM_DESC(register_always,
"Use memory registration even for contiguous memory regions");
+module_param(never_register, bool, 0444);
+MODULE_PARM_DESC(never_register, "Never register memory");
+
static const struct kernel_param_ops srp_tmo_ops;
static int srp_reconnect_delay = 10;
@@ -316,7 +320,7 @@ static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
struct ib_fmr_pool_param fmr_param;
memset(&fmr_param, 0, sizeof(fmr_param));
- fmr_param.pool_size = target->scsi_host->can_queue;
+ fmr_param.pool_size = target->mr_pool_size;
fmr_param.dirty_watermark = fmr_param.pool_size / 4;
fmr_param.cache = 1;
fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
@@ -441,23 +445,22 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
{
struct srp_device *dev = target->srp_host->srp_dev;
- return srp_create_fr_pool(dev->dev, dev->pd,
- target->scsi_host->can_queue,
+ return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
dev->max_pages_per_mr);
}
/**
* srp_destroy_qp() - destroy an RDMA queue pair
- * @ch: SRP RDMA channel.
+ * @qp: RDMA queue pair.
*
* Drain the qp before destroying it. This avoids that the receive
* completion handler can access the queue pair while it is
* being destroyed.
*/
-static void srp_destroy_qp(struct srp_rdma_ch *ch)
+static void srp_destroy_qp(struct ib_qp *qp)
{
- ib_drain_rq(ch->qp);
- ib_destroy_qp(ch->qp);
+ ib_drain_rq(qp);
+ ib_destroy_qp(qp);
}
static int srp_create_ch_ib(struct srp_rdma_ch *ch)
@@ -469,7 +472,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_qp *qp;
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
- const int m = dev->use_fast_reg ? 3 : 1;
+ const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -530,7 +533,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
}
if (ch->qp)
- srp_destroy_qp(ch);
+ srp_destroy_qp(ch->qp);
if (ch->recv_cq)
ib_free_cq(ch->recv_cq);
if (ch->send_cq)
@@ -554,7 +557,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- srp_destroy_qp(ch);
+ srp_destroy_qp(qp);
err_send_cq:
ib_free_cq(send_cq);
@@ -597,7 +600,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
ib_destroy_fmr_pool(ch->fmr_pool);
}
- srp_destroy_qp(ch);
+ srp_destroy_qp(ch->qp);
ib_free_cq(ch->send_cq);
ib_free_cq(ch->recv_cq);
@@ -850,7 +853,7 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
for (i = 0; i < target->req_ring_size; ++i) {
req = &ch->req_ring[i];
- mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+ mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
GFP_KERNEL);
if (!mr_list)
goto out;
@@ -1112,7 +1115,7 @@ static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
}
/**
- * srp_free_req() - Unmap data and add request to the free request list.
+ * srp_free_req() - Unmap data and adjust ch->req_lim.
* @ch: SRP RDMA channel.
* @req: Request to be freed.
* @scmnd: SCSI command associated with @req.
@@ -1299,9 +1302,16 @@ static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
srp_handle_qp_err(cq, wc, "FAST REG");
}
+/*
+ * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
+ * where to start in the first element. If sg_offset_p != NULL then
+ * *sg_offset_p is updated to the offset in state->sg[retval] of the first
+ * byte that has not yet been mapped.
+ */
static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_request *req,
- struct srp_rdma_ch *ch, int sg_nents)
+ struct srp_rdma_ch *ch, int sg_nents,
+ unsigned int *sg_offset_p)
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
@@ -1316,13 +1326,14 @@ static int srp_map_finish_fr(struct srp_map_state *state,
WARN_ON_ONCE(!dev->use_fast_reg);
- if (sg_nents == 0)
- return 0;
-
if (sg_nents == 1 && target->global_mr) {
- srp_map_desc(state, sg_dma_address(state->sg),
- sg_dma_len(state->sg),
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
+
+ srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
+ sg_dma_len(state->sg) - sg_offset,
target->global_mr->rkey);
+ if (sg_offset_p)
+ *sg_offset_p = 0;
return 1;
}
@@ -1333,9 +1344,17 @@ static int srp_map_finish_fr(struct srp_map_state *state,
rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey);
- n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
- if (unlikely(n < 0))
+ n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
+ dev->mr_page_size);
+ if (unlikely(n < 0)) {
+ srp_fr_pool_put(ch->fr_pool, &desc, 1);
+ pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
+ dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
+ sg_offset_p ? *sg_offset_p : -1, n);
return n;
+ }
+
+ WARN_ON_ONCE(desc->mr->length == 0);
req->reg_cqe.done = srp_reg_mr_err_done;
@@ -1357,8 +1376,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
desc->mr->length, desc->mr->rkey);
err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
- if (unlikely(err))
+ if (unlikely(err)) {
+ WARN_ON_ONCE(err == -ENOMEM);
return err;
+ }
return n;
}
@@ -1398,7 +1419,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
/*
* If the last entry of the MR wasn't a full page, then we need to
* close it out and start a new one -- we can only merge at page
- * boundries.
+ * boundaries.
*/
ret = 0;
if (len != dev->mr_page_size)
@@ -1413,10 +1434,9 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
struct scatterlist *sg;
int i, ret;
- state->desc = req->indirect_desc;
state->pages = req->map_page;
state->fmr.next = req->fmr_list;
- state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
+ state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
for_each_sg(scat, sg, count, i) {
ret = srp_map_sg_entry(state, ch, sg, i);
@@ -1428,8 +1448,6 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
if (ret)
return ret;
- req->nmdesc = state->nmdesc;
-
return 0;
}
@@ -1437,15 +1455,20 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
struct srp_request *req, struct scatterlist *scat,
int count)
{
+ unsigned int sg_offset = 0;
+
state->desc = req->indirect_desc;
state->fr.next = req->fr_list;
- state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
+ state->fr.end = req->fr_list + ch->target->mr_per_cmd;
state->sg = scat;
+ if (count == 0)
+ return 0;
+
while (count) {
int i, n;
- n = srp_map_finish_fr(state, req, ch, count);
+ n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
if (unlikely(n < 0))
return n;
@@ -1454,8 +1477,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
state->sg = sg_next(state->sg);
}
- req->nmdesc = state->nmdesc;
-
return 0;
}
@@ -1475,8 +1496,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
target->global_mr->rkey);
}
- req->nmdesc = state->nmdesc;
-
return 0;
}
@@ -1509,14 +1528,15 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
if (dev->use_fast_reg) {
state.sg = idb_sg;
- sg_set_buf(idb_sg, req->indirect_desc, idb_len);
+ sg_init_one(idb_sg, req->indirect_desc, idb_len);
idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
- ret = srp_map_finish_fr(&state, req, ch, 1);
+ ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
if (ret < 0)
return ret;
+ WARN_ON_ONCE(ret < 1);
} else if (dev->use_fmr) {
state.pages = idb_pages;
state.pages[0] = (req->indirect_dma_addr &
@@ -1534,6 +1554,41 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
return 0;
}
+#if defined(DYNAMIC_DATA_DEBUG)
+static void srp_check_mapping(struct srp_map_state *state,
+ struct srp_rdma_ch *ch, struct srp_request *req,
+ struct scatterlist *scat, int count)
+{
+ struct srp_device *dev = ch->target->srp_host->srp_dev;
+ struct srp_fr_desc **pfr;
+ u64 desc_len = 0, mr_len = 0;
+ int i;
+
+ for (i = 0; i < state->ndesc; i++)
+ desc_len += be32_to_cpu(req->indirect_desc[i].len);
+ if (dev->use_fast_reg)
+ for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
+ mr_len += (*pfr)->mr->length;
+ else if (dev->use_fmr)
+ for (i = 0; i < state->nmdesc; i++)
+ mr_len += be32_to_cpu(req->indirect_desc[i].len);
+ if (desc_len != scsi_bufflen(req->scmnd) ||
+ mr_len > scsi_bufflen(req->scmnd))
+ pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
+ scsi_bufflen(req->scmnd), desc_len, mr_len,
+ state->ndesc, state->nmdesc);
+}
+#endif
+
+/**
+ * srp_map_data() - map SCSI data buffer onto an SRP request
+ * @scmnd: SCSI command to map
+ * @ch: SRP RDMA channel
+ * @req: SRP request
+ *
+ * Returns the length in bytes of the SRP_CMD IU or a negative value if
+ * mapping failed.
+ */
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_request *req)
{
@@ -1601,11 +1656,23 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
memset(&state, 0, sizeof(state));
if (dev->use_fast_reg)
- srp_map_sg_fr(&state, ch, req, scat, count);
+ ret = srp_map_sg_fr(&state, ch, req, scat, count);
else if (dev->use_fmr)
- srp_map_sg_fmr(&state, ch, req, scat, count);
+ ret = srp_map_sg_fmr(&state, ch, req, scat, count);
else
- srp_map_sg_dma(&state, ch, req, scat, count);
+ ret = srp_map_sg_dma(&state, ch, req, scat, count);
+ req->nmdesc = state.nmdesc;
+ if (ret < 0)
+ goto unmap;
+
+#if defined(DYNAMIC_DEBUG)
+ {
+ DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
+ "Memory mapping consistency check");
+ if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
+ srp_check_mapping(&state, ch, req, scat, count);
+ }
+#endif
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -1628,7 +1695,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
!target->allow_ext_sg)) {
shost_printk(KERN_ERR, target->scsi_host,
"Could not fit S/G list into SRP_CMD\n");
- return -EIO;
+ ret = -EIO;
+ goto unmap;
}
count = min(state.ndesc, target->cmd_sg_cnt);
@@ -1646,7 +1714,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
idb_len, &idb_rkey);
if (ret < 0)
- return ret;
+ goto unmap;
req->nmdesc++;
} else {
idb_rkey = cpu_to_be32(target->global_mr->rkey);
@@ -1672,6 +1740,12 @@ map_complete:
cmd->buf_fmt = fmt;
return len;
+
+unmap:
+ srp_unmap_data(scmnd, ch, req);
+ if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
+ ret = -E2BIG;
+ return ret;
}
/*
@@ -2564,6 +2638,20 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
+static int srp_slave_alloc(struct scsi_device *sdev)
+{
+ struct Scsi_Host *shost = sdev->host;
+ struct srp_target_port *target = host_to_target(shost);
+ struct srp_device *srp_dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
+
+ if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ blk_queue_virt_boundary(sdev->request_queue,
+ ~srp_dev->mr_page_mask);
+
+ return 0;
+}
+
static int srp_slave_configure(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
@@ -2755,6 +2843,7 @@ static struct scsi_host_template srp_template = {
.module = THIS_MODULE,
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
+ .slave_alloc = srp_slave_alloc,
.slave_configure = srp_slave_configure,
.info = srp_target_info,
.queuecommand = srp_queuecommand,
@@ -2829,7 +2918,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
goto out;
}
- pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
+ pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
dev_name(&target->scsi_host->shost_gendev),
srp_sdev_count(target->scsi_host));
@@ -3161,6 +3250,7 @@ static ssize_t srp_create_target(struct device *dev,
struct srp_device *srp_dev = host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
int ret, node_idx, node, cpu, i;
+ unsigned int max_sectors_per_mr, mr_per_cmd = 0;
bool multich = false;
target_host = scsi_host_alloc(&srp_template,
@@ -3217,7 +3307,33 @@ static ssize_t srp_create_target(struct device *dev,
target->sg_tablesize = target->cmd_sg_cnt;
}
+ if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
+ /*
+ * FR and FMR can only map one HCA page per entry. If the
+ * start address is not aligned on a HCA page boundary two
+ * entries will be used for the head and the tail although
+ * these two entries combined contain at most one HCA page of
+ * data. Hence the "+ 1" in the calculation below.
+ *
+ * The indirect data buffer descriptor is contiguous so the
+ * memory for that buffer will only be registered if
+ * register_always is true. Hence add one to mr_per_cmd if
+ * register_always has been set.
+ */
+ max_sectors_per_mr = srp_dev->max_pages_per_mr <<
+ (ilog2(srp_dev->mr_page_size) - 9);
+ mr_per_cmd = register_always +
+ (target->scsi_host->max_sectors + 1 +
+ max_sectors_per_mr - 1) / max_sectors_per_mr;
+ pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
+ target->scsi_host->max_sectors,
+ srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
+ max_sectors_per_mr, mr_per_cmd);
+ }
+
target_host->sg_tablesize = target->sg_tablesize;
+ target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
+ target->mr_per_cmd = mr_per_cmd;
target->indirect_size = target->sg_tablesize *
sizeof (struct srp_direct_buf);
target->max_iu_len = sizeof (struct srp_cmd) +
@@ -3414,17 +3530,6 @@ static void srp_add_one(struct ib_device *device)
if (!srp_dev)
return;
- srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS);
- if (!srp_dev->has_fmr && !srp_dev->has_fr)
- dev_warn(&device->dev, "neither FMR nor FR is supported\n");
-
- srp_dev->use_fast_reg = (srp_dev->has_fr &&
- (!srp_dev->has_fmr || prefer_fr));
- srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
-
/*
* Use the smallest page size supported by the HCA, down to a
* minimum of 4096 bytes. We're unlikely to build large sglists
@@ -3435,8 +3540,25 @@ static void srp_add_one(struct ib_device *device)
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
+ pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
+ device->attrs.max_mr_size, srp_dev->mr_page_size,
+ max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
+
+ srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ srp_dev->has_fr = (device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
+ dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+ } else if (!never_register &&
+ device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
+ srp_dev->use_fast_reg = (srp_dev->has_fr &&
+ (!srp_dev->has_fmr || prefer_fr));
+ srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
+ }
+
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
@@ -3456,7 +3578,8 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->pd))
goto free_dev;
- if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
+ if (never_register || !register_always ||
+ (!srp_dev->has_fmr && !srp_dev->has_fr)) {
srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 9e05ce4a04fd..26bb9b0a7a63 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -202,6 +202,8 @@ struct srp_target_port {
char target_name[32];
unsigned int scsi_id;
unsigned int sg_tablesize;
+ int mr_pool_size;
+ int mr_per_cmd;
int queue_size;
int req_ring_size;
int comp_vector;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 8b42401d4795..2843f1ae75bd 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -765,52 +765,6 @@ static int srpt_post_recv(struct srpt_device *sdev,
}
/**
- * srpt_post_send() - Post an IB send request.
- *
- * Returns zero upon success and a non-zero value upon failure.
- */
-static int srpt_post_send(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx, int len)
-{
- struct ib_sge list;
- struct ib_send_wr wr, *bad_wr;
- struct srpt_device *sdev = ch->sport->sdev;
- int ret;
-
- atomic_inc(&ch->req_lim);
-
- ret = -ENOMEM;
- if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
- pr_warn("IB send queue full (needed 1)\n");
- goto out;
- }
-
- ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len,
- DMA_TO_DEVICE);
-
- list.addr = ioctx->ioctx.dma;
- list.length = len;
- list.lkey = sdev->pd->local_dma_lkey;
-
- ioctx->ioctx.cqe.done = srpt_send_done;
- wr.next = NULL;
- wr.wr_cqe = &ioctx->ioctx.cqe;
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IB_WR_SEND;
- wr.send_flags = IB_SEND_SIGNALED;
-
- ret = ib_post_send(ch->qp, &wr, &bad_wr);
-
-out:
- if (ret < 0) {
- atomic_inc(&ch->sq_wr_avail);
- atomic_dec(&ch->req_lim);
- }
- return ret;
-}
-
-/**
* srpt_zerolength_write() - Perform a zero-length RDMA write.
*
* A quote from the InfiniBand specification: C9-88: For an HCA responder
@@ -843,6 +797,110 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
}
}
+static int srpt_alloc_rw_ctxs(struct srpt_send_ioctx *ioctx,
+ struct srp_direct_buf *db, int nbufs, struct scatterlist **sg,
+ unsigned *sg_cnt)
+{
+ enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd);
+ struct srpt_rdma_ch *ch = ioctx->ch;
+ struct scatterlist *prev = NULL;
+ unsigned prev_nents;
+ int ret, i;
+
+ if (nbufs == 1) {
+ ioctx->rw_ctxs = &ioctx->s_rw_ctx;
+ } else {
+ ioctx->rw_ctxs = kmalloc_array(nbufs, sizeof(*ioctx->rw_ctxs),
+ GFP_KERNEL);
+ if (!ioctx->rw_ctxs)
+ return -ENOMEM;
+ }
+
+ for (i = ioctx->n_rw_ctx; i < nbufs; i++, db++) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+ u64 remote_addr = be64_to_cpu(db->va);
+ u32 size = be32_to_cpu(db->len);
+ u32 rkey = be32_to_cpu(db->key);
+
+ ret = target_alloc_sgl(&ctx->sg, &ctx->nents, size, false,
+ i < nbufs - 1);
+ if (ret)
+ goto unwind;
+
+ ret = rdma_rw_ctx_init(&ctx->rw, ch->qp, ch->sport->port,
+ ctx->sg, ctx->nents, 0, remote_addr, rkey, dir);
+ if (ret < 0) {
+ target_free_sgl(ctx->sg, ctx->nents);
+ goto unwind;
+ }
+
+ ioctx->n_rdma += ret;
+ ioctx->n_rw_ctx++;
+
+ if (prev) {
+ sg_unmark_end(&prev[prev_nents - 1]);
+ sg_chain(prev, prev_nents + 1, ctx->sg);
+ } else {
+ *sg = ctx->sg;
+ }
+
+ prev = ctx->sg;
+ prev_nents = ctx->nents;
+
+ *sg_cnt += ctx->nents;
+ }
+
+ return 0;
+
+unwind:
+ while (--i >= 0) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port,
+ ctx->sg, ctx->nents, dir);
+ target_free_sgl(ctx->sg, ctx->nents);
+ }
+ if (ioctx->rw_ctxs != &ioctx->s_rw_ctx)
+ kfree(ioctx->rw_ctxs);
+ return ret;
+}
+
+static void srpt_free_rw_ctxs(struct srpt_rdma_ch *ch,
+ struct srpt_send_ioctx *ioctx)
+{
+ enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd);
+ int i;
+
+ for (i = 0; i < ioctx->n_rw_ctx; i++) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port,
+ ctx->sg, ctx->nents, dir);
+ target_free_sgl(ctx->sg, ctx->nents);
+ }
+
+ if (ioctx->rw_ctxs != &ioctx->s_rw_ctx)
+ kfree(ioctx->rw_ctxs);
+}
+
+static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
+{
+ /*
+ * The pointer computations below will only be compiled correctly
+ * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
+ * whether srp_cmd::add_data has been declared as a byte pointer.
+ */
+ BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) &&
+ !__same_type(srp_cmd->add_data[0], (u8)0));
+
+ /*
+ * According to the SRP spec, the lower two bits of the 'ADDITIONAL
+ * CDB LENGTH' field are reserved and the size in bytes of this field
+ * is four times the value specified in bits 3..7. Hence the "& ~3".
+ */
+ return srp_cmd->add_data + (srp_cmd->add_cdb_len & ~3);
+}
+
/**
* srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
* @ioctx: Pointer to the I/O context associated with the request.
@@ -858,94 +916,59 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
* -ENOMEM when memory allocation fails and zero upon success.
*/
static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
- struct srp_cmd *srp_cmd,
- enum dma_data_direction *dir, u64 *data_len)
+ struct srp_cmd *srp_cmd, enum dma_data_direction *dir,
+ struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len)
{
- struct srp_indirect_buf *idb;
- struct srp_direct_buf *db;
- unsigned add_cdb_offset;
- int ret;
-
- /*
- * The pointer computations below will only be compiled correctly
- * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
- * whether srp_cmd::add_data has been declared as a byte pointer.
- */
- BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
- && !__same_type(srp_cmd->add_data[0], (u8)0));
-
BUG_ON(!dir);
BUG_ON(!data_len);
- ret = 0;
- *data_len = 0;
-
/*
* The lower four bits of the buffer format field contain the DATA-IN
* buffer descriptor format, and the highest four bits contain the
* DATA-OUT buffer descriptor format.
*/
- *dir = DMA_NONE;
if (srp_cmd->buf_fmt & 0xf)
/* DATA-IN: transfer data from target to initiator (read). */
*dir = DMA_FROM_DEVICE;
else if (srp_cmd->buf_fmt >> 4)
/* DATA-OUT: transfer data from initiator to target (write). */
*dir = DMA_TO_DEVICE;
+ else
+ *dir = DMA_NONE;
+
+ /* initialize data_direction early as srpt_alloc_rw_ctxs needs it */
+ ioctx->cmd.data_direction = *dir;
- /*
- * According to the SRP spec, the lower two bits of the 'ADDITIONAL
- * CDB LENGTH' field are reserved and the size in bytes of this field
- * is four times the value specified in bits 3..7. Hence the "& ~3".
- */
- add_cdb_offset = srp_cmd->add_cdb_len & ~3;
if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
- ioctx->n_rbuf = 1;
- ioctx->rbufs = &ioctx->single_rbuf;
+ struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd);
- db = (struct srp_direct_buf *)(srp_cmd->add_data
- + add_cdb_offset);
- memcpy(ioctx->rbufs, db, sizeof(*db));
*data_len = be32_to_cpu(db->len);
+ return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt);
} else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
- idb = (struct srp_indirect_buf *)(srp_cmd->add_data
- + add_cdb_offset);
+ struct srp_indirect_buf *idb = srpt_get_desc_buf(srp_cmd);
+ int nbufs = be32_to_cpu(idb->table_desc.len) /
+ sizeof(struct srp_direct_buf);
- ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db);
-
- if (ioctx->n_rbuf >
+ if (nbufs >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
pr_err("received unsupported SRP_CMD request"
" type (%u out + %u in != %u / %zu)\n",
srp_cmd->data_out_desc_cnt,
srp_cmd->data_in_desc_cnt,
be32_to_cpu(idb->table_desc.len),
- sizeof(*db));
- ioctx->n_rbuf = 0;
- ret = -EINVAL;
- goto out;
- }
-
- if (ioctx->n_rbuf == 1)
- ioctx->rbufs = &ioctx->single_rbuf;
- else {
- ioctx->rbufs =
- kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC);
- if (!ioctx->rbufs) {
- ioctx->n_rbuf = 0;
- ret = -ENOMEM;
- goto out;
- }
+ sizeof(struct srp_direct_buf));
+ return -EINVAL;
}
- db = idb->desc_list;
- memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db));
*data_len = be32_to_cpu(idb->len);
+ return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs,
+ sg, sg_cnt);
+ } else {
+ *data_len = 0;
+ return 0;
}
-out:
- return ret;
}
/**
@@ -1049,217 +1072,6 @@ static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
}
/**
- * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list.
- */
-static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- struct scatterlist *sg;
- enum dma_data_direction dir;
-
- BUG_ON(!ch);
- BUG_ON(!ioctx);
- BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
-
- while (ioctx->n_rdma)
- kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
-
- kfree(ioctx->rdma_wrs);
- ioctx->rdma_wrs = NULL;
-
- if (ioctx->mapped_sg_count) {
- sg = ioctx->sg;
- WARN_ON(!sg);
- dir = ioctx->cmd.data_direction;
- BUG_ON(dir == DMA_NONE);
- ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
- target_reverse_dma_direction(&ioctx->cmd));
- ioctx->mapped_sg_count = 0;
- }
-}
-
-/**
- * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list.
- */
-static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- struct ib_device *dev = ch->sport->sdev->device;
- struct se_cmd *cmd;
- struct scatterlist *sg, *sg_orig;
- int sg_cnt;
- enum dma_data_direction dir;
- struct ib_rdma_wr *riu;
- struct srp_direct_buf *db;
- dma_addr_t dma_addr;
- struct ib_sge *sge;
- u64 raddr;
- u32 rsize;
- u32 tsize;
- u32 dma_len;
- int count, nrdma;
- int i, j, k;
-
- BUG_ON(!ch);
- BUG_ON(!ioctx);
- cmd = &ioctx->cmd;
- dir = cmd->data_direction;
- BUG_ON(dir == DMA_NONE);
-
- ioctx->sg = sg = sg_orig = cmd->t_data_sg;
- ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
-
- count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
- target_reverse_dma_direction(cmd));
- if (unlikely(!count))
- return -EAGAIN;
-
- ioctx->mapped_sg_count = count;
-
- if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
- nrdma = ioctx->n_rdma_wrs;
- else {
- nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
- + ioctx->n_rbuf;
-
- ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
- GFP_KERNEL);
- if (!ioctx->rdma_wrs)
- goto free_mem;
-
- ioctx->n_rdma_wrs = nrdma;
- }
-
- db = ioctx->rbufs;
- tsize = cmd->data_length;
- dma_len = ib_sg_dma_len(dev, &sg[0]);
- riu = ioctx->rdma_wrs;
-
- /*
- * For each remote desc - calculate the #ib_sge.
- * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
- * each remote desc rdma_iu is required a rdma wr;
- * else
- * we need to allocate extra rdma_iu to carry extra #ib_sge in
- * another rdma wr
- */
- for (i = 0, j = 0;
- j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
- rsize = be32_to_cpu(db->len);
- raddr = be64_to_cpu(db->va);
- riu->remote_addr = raddr;
- riu->rkey = be32_to_cpu(db->key);
- riu->wr.num_sge = 0;
-
- /* calculate how many sge required for this remote_buf */
- while (rsize > 0 && tsize > 0) {
-
- if (rsize >= dma_len) {
- tsize -= dma_len;
- rsize -= dma_len;
- raddr += dma_len;
-
- if (tsize > 0) {
- ++j;
- if (j < count) {
- sg = sg_next(sg);
- dma_len = ib_sg_dma_len(
- dev, sg);
- }
- }
- } else {
- tsize -= rsize;
- dma_len -= rsize;
- rsize = 0;
- }
-
- ++riu->wr.num_sge;
-
- if (rsize > 0 &&
- riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
- ++ioctx->n_rdma;
- riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
- sizeof(*riu->wr.sg_list),
- GFP_KERNEL);
- if (!riu->wr.sg_list)
- goto free_mem;
-
- ++riu;
- riu->wr.num_sge = 0;
- riu->remote_addr = raddr;
- riu->rkey = be32_to_cpu(db->key);
- }
- }
-
- ++ioctx->n_rdma;
- riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
- sizeof(*riu->wr.sg_list),
- GFP_KERNEL);
- if (!riu->wr.sg_list)
- goto free_mem;
- }
-
- db = ioctx->rbufs;
- tsize = cmd->data_length;
- riu = ioctx->rdma_wrs;
- sg = sg_orig;
- dma_len = ib_sg_dma_len(dev, &sg[0]);
- dma_addr = ib_sg_dma_address(dev, &sg[0]);
-
- /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
- for (i = 0, j = 0;
- j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
- rsize = be32_to_cpu(db->len);
- sge = riu->wr.sg_list;
- k = 0;
-
- while (rsize > 0 && tsize > 0) {
- sge->addr = dma_addr;
- sge->lkey = ch->sport->sdev->pd->local_dma_lkey;
-
- if (rsize >= dma_len) {
- sge->length =
- (tsize < dma_len) ? tsize : dma_len;
- tsize -= dma_len;
- rsize -= dma_len;
-
- if (tsize > 0) {
- ++j;
- if (j < count) {
- sg = sg_next(sg);
- dma_len = ib_sg_dma_len(
- dev, sg);
- dma_addr = ib_sg_dma_address(
- dev, sg);
- }
- }
- } else {
- sge->length = (tsize < rsize) ? tsize : rsize;
- tsize -= rsize;
- dma_len -= rsize;
- dma_addr += rsize;
- rsize = 0;
- }
-
- ++k;
- if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
- ++riu;
- sge = riu->wr.sg_list;
- k = 0;
- } else if (rsize > 0 && tsize > 0)
- ++sge;
- }
- }
-
- return 0;
-
-free_mem:
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
-
- return -ENOMEM;
-}
-
-/**
* srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator.
*/
static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
@@ -1284,12 +1096,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
BUG_ON(ioctx->ch != ch);
spin_lock_init(&ioctx->spinlock);
ioctx->state = SRPT_STATE_NEW;
- ioctx->n_rbuf = 0;
- ioctx->rbufs = NULL;
ioctx->n_rdma = 0;
- ioctx->n_rdma_wrs = 0;
- ioctx->rdma_wrs = NULL;
- ioctx->mapped_sg_count = 0;
+ ioctx->n_rw_ctx = 0;
init_completion(&ioctx->tx_done);
ioctx->queue_status_only = false;
/*
@@ -1359,7 +1167,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
* SRP_RSP sending failed or the SRP_RSP send completion has
* not been received in time.
*/
- srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0);
break;
case SRPT_STATE_MGMT_RSP_SENT:
@@ -1387,6 +1194,7 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(ioctx->n_rdma <= 0);
atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
+ ioctx->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
@@ -1403,23 +1211,6 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
__LINE__, srpt_get_cmd_state(ioctx));
}
-static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct srpt_send_ioctx *ioctx =
- container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
-
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
- /*
- * Note: if an RDMA write error completion is received that
- * means that a SEND also has been posted. Defer further
- * processing of the associated command until the send error
- * completion has been received.
- */
- pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
- ioctx, wc->status);
- }
-}
-
/**
* srpt_build_cmd_rsp() - Build an SRP_RSP response.
* @ch: RDMA channel through which the request has been received.
@@ -1537,6 +1328,8 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
{
struct se_cmd *cmd;
struct srp_cmd *srp_cmd;
+ struct scatterlist *sg = NULL;
+ unsigned sg_cnt = 0;
u64 data_len;
enum dma_data_direction dir;
int rc;
@@ -1563,16 +1356,21 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
break;
}
- if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
- pr_err("0x%llx: parsing SRP descriptor table failed.\n",
- srp_cmd->tag);
+ rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt,
+ &data_len);
+ if (rc) {
+ if (rc != -EAGAIN) {
+ pr_err("0x%llx: parsing SRP descriptor table failed.\n",
+ srp_cmd->tag);
+ }
goto release_ioctx;
}
- rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
+ rc = target_submit_cmd_map_sgls(cmd, ch->sess, srp_cmd->cdb,
&send_ioctx->sense_data[0],
scsilun_to_int(&srp_cmd->lun), data_len,
- TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
+ TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF,
+ sg, sg_cnt, NULL, 0, NULL, 0);
if (rc != 0) {
pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc,
srp_cmd->tag);
@@ -1664,23 +1462,21 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
recv_ioctx->ioctx.dma, srp_max_req_size,
DMA_FROM_DEVICE);
- if (unlikely(ch->state == CH_CONNECTING)) {
- list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
- goto out;
- }
+ if (unlikely(ch->state == CH_CONNECTING))
+ goto out_wait;
if (unlikely(ch->state != CH_LIVE))
- goto out;
+ return;
srp_cmd = recv_ioctx->ioctx.buf;
if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) {
- if (!send_ioctx)
+ if (!send_ioctx) {
+ if (!list_empty(&ch->cmd_wait_list))
+ goto out_wait;
send_ioctx = srpt_get_send_ioctx(ch);
- if (unlikely(!send_ioctx)) {
- list_add_tail(&recv_ioctx->wait_list,
- &ch->cmd_wait_list);
- goto out;
}
+ if (unlikely(!send_ioctx))
+ goto out_wait;
}
switch (srp_cmd->opcode) {
@@ -1709,8 +1505,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
}
srpt_post_recv(ch->sport->sdev, recv_ioctx);
-out:
return;
+
+out_wait:
+ list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
}
static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1779,14 +1577,13 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
state != SRPT_STATE_MGMT_RSP_SENT);
- atomic_inc(&ch->sq_wr_avail);
+ atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
if (wc->status != IB_WC_SUCCESS)
pr_info("sending response for ioctx 0x%p failed"
" with status %d\n", ioctx, wc->status);
if (state != SRPT_STATE_DONE) {
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
pr_err("IB completion has been received too late for"
@@ -1832,8 +1629,18 @@ retry:
qp_init->srq = sdev->srq;
qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
qp_init->qp_type = IB_QPT_RC;
- qp_init->cap.max_send_wr = srp_sq_size;
- qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
+ /*
+ * We divide up our send queue size into half SEND WRs to send the
+ * completions, and half R/W contexts to actually do the RDMA
+ * READ/WRITE transfers. Note that we need to allocate CQ slots for
+ * both both, as RDMA contexts will also post completions for the
+ * RDMA READ case.
+ */
+ qp_init->cap.max_send_wr = srp_sq_size / 2;
+ qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
+ qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
+ sdev->device->attrs.max_sge);
+ qp_init->port_num = ch->sport->port;
ch->qp = ib_create_qp(sdev->pd, qp_init);
if (IS_ERR(ch->qp)) {
@@ -2386,95 +2193,6 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
return ret;
}
-/**
- * srpt_perform_rdmas() - Perform IB RDMA.
- *
- * Returns zero upon success or a negative number upon failure.
- */
-static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- struct ib_send_wr *bad_wr;
- int sq_wr_avail, ret, i;
- enum dma_data_direction dir;
- const int n_rdma = ioctx->n_rdma;
-
- dir = ioctx->cmd.data_direction;
- if (dir == DMA_TO_DEVICE) {
- /* write */
- ret = -ENOMEM;
- sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
- if (sq_wr_avail < 0) {
- pr_warn("IB send queue full (needed %d)\n",
- n_rdma);
- goto out;
- }
- }
-
- for (i = 0; i < n_rdma; i++) {
- struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
-
- wr->opcode = (dir == DMA_FROM_DEVICE) ?
- IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
-
- if (i == n_rdma - 1) {
- /* only get completion event for the last rdma read */
- if (dir == DMA_TO_DEVICE) {
- wr->send_flags = IB_SEND_SIGNALED;
- ioctx->rdma_cqe.done = srpt_rdma_read_done;
- } else {
- ioctx->rdma_cqe.done = srpt_rdma_write_done;
- }
- wr->wr_cqe = &ioctx->rdma_cqe;
- wr->next = NULL;
- } else {
- wr->wr_cqe = NULL;
- wr->next = &ioctx->rdma_wrs[i + 1].wr;
- }
- }
-
- ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
- if (ret)
- pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
- __func__, __LINE__, ret, i, n_rdma);
-out:
- if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
- atomic_add(n_rdma, &ch->sq_wr_avail);
- return ret;
-}
-
-/**
- * srpt_xfer_data() - Start data transfer from initiator to target.
- */
-static int srpt_xfer_data(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- int ret;
-
- ret = srpt_map_sg_to_ib_sge(ch, ioctx);
- if (ret) {
- pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
- goto out;
- }
-
- ret = srpt_perform_rdmas(ch, ioctx);
- if (ret) {
- if (ret == -EAGAIN || ret == -ENOMEM)
- pr_info("%s[%d] queue full -- ret=%d\n",
- __func__, __LINE__, ret);
- else
- pr_err("%s[%d] fatal error -- ret=%d\n",
- __func__, __LINE__, ret);
- goto out_unmap;
- }
-
-out:
- return ret;
-out_unmap:
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- goto out;
-}
-
static int srpt_write_pending_status(struct se_cmd *se_cmd)
{
struct srpt_send_ioctx *ioctx;
@@ -2491,11 +2209,42 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
struct srpt_send_ioctx *ioctx =
container_of(se_cmd, struct srpt_send_ioctx, cmd);
struct srpt_rdma_ch *ch = ioctx->ch;
+ struct ib_send_wr *first_wr = NULL, *bad_wr;
+ struct ib_cqe *cqe = &ioctx->rdma_cqe;
enum srpt_command_state new_state;
+ int ret, i;
new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
WARN_ON(new_state == SRPT_STATE_DONE);
- return srpt_xfer_data(ch, ioctx);
+
+ if (atomic_sub_return(ioctx->n_rdma, &ch->sq_wr_avail) < 0) {
+ pr_warn("%s: IB send queue full (needed %d)\n",
+ __func__, ioctx->n_rdma);
+ ret = -ENOMEM;
+ goto out_undo;
+ }
+
+ cqe->done = srpt_rdma_read_done;
+ for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, ch->sport->port,
+ cqe, first_wr);
+ cqe = NULL;
+ }
+
+ ret = ib_post_send(ch->qp, first_wr, &bad_wr);
+ if (ret) {
+ pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n",
+ __func__, ret, ioctx->n_rdma,
+ atomic_read(&ch->sq_wr_avail));
+ goto out_undo;
+ }
+
+ return 0;
+out_undo:
+ atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
+ return ret;
}
static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
@@ -2517,17 +2266,17 @@ static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
*/
static void srpt_queue_response(struct se_cmd *cmd)
{
- struct srpt_rdma_ch *ch;
- struct srpt_send_ioctx *ioctx;
+ struct srpt_send_ioctx *ioctx =
+ container_of(cmd, struct srpt_send_ioctx, cmd);
+ struct srpt_rdma_ch *ch = ioctx->ch;
+ struct srpt_device *sdev = ch->sport->sdev;
+ struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr;
+ struct ib_sge sge;
enum srpt_command_state state;
unsigned long flags;
- int ret;
- enum dma_data_direction dir;
- int resp_len;
+ int resp_len, ret, i;
u8 srp_tm_status;
- ioctx = container_of(cmd, struct srpt_send_ioctx, cmd);
- ch = ioctx->ch;
BUG_ON(!ch);
spin_lock_irqsave(&ioctx->spinlock, flags);
@@ -2554,17 +2303,19 @@ static void srpt_queue_response(struct se_cmd *cmd)
return;
}
- dir = ioctx->cmd.data_direction;
-
/* For read commands, transfer the data to the initiator. */
- if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length &&
+ if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
+ ioctx->cmd.data_length &&
!ioctx->queue_status_only) {
- ret = srpt_xfer_data(ch, ioctx);
- if (ret) {
- pr_err("xfer_data failed for tag %llu\n",
- ioctx->cmd.tag);
- return;
+ for (i = ioctx->n_rw_ctx - 1; i >= 0; i--) {
+ struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
+
+ first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp,
+ ch->sport->port, NULL,
+ first_wr ? first_wr : &send_wr);
}
+ } else {
+ first_wr = &send_wr;
}
if (state != SRPT_STATE_MGMT)
@@ -2576,14 +2327,46 @@ static void srpt_queue_response(struct se_cmd *cmd)
resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status,
ioctx->cmd.tag);
}
- ret = srpt_post_send(ch, ioctx, resp_len);
- if (ret) {
- pr_err("sending cmd response failed for tag %llu\n",
- ioctx->cmd.tag);
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
- target_put_sess_cmd(&ioctx->cmd);
+
+ atomic_inc(&ch->req_lim);
+
+ if (unlikely(atomic_sub_return(1 + ioctx->n_rdma,
+ &ch->sq_wr_avail) < 0)) {
+ pr_warn("%s: IB send queue full (needed %d)\n",
+ __func__, ioctx->n_rdma);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, resp_len,
+ DMA_TO_DEVICE);
+
+ sge.addr = ioctx->ioctx.dma;
+ sge.length = resp_len;
+ sge.lkey = sdev->pd->local_dma_lkey;
+
+ ioctx->ioctx.cqe.done = srpt_send_done;
+ send_wr.next = NULL;
+ send_wr.wr_cqe = &ioctx->ioctx.cqe;
+ send_wr.sg_list = &sge;
+ send_wr.num_sge = 1;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(ch->qp, first_wr, &bad_wr);
+ if (ret < 0) {
+ pr_err("%s: sending cmd response failed for tag %llu (%d)\n",
+ __func__, ioctx->cmd.tag, ret);
+ goto out;
}
+
+ return;
+
+out:
+ atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
+ atomic_dec(&ch->req_lim);
+ srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+ target_put_sess_cmd(&ioctx->cmd);
}
static int srpt_queue_data_in(struct se_cmd *cmd)
@@ -2599,10 +2382,6 @@ static void srpt_queue_tm_rsp(struct se_cmd *cmd)
static void srpt_aborted_task(struct se_cmd *cmd)
{
- struct srpt_send_ioctx *ioctx = container_of(cmd,
- struct srpt_send_ioctx, cmd);
-
- srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
}
static int srpt_queue_status(struct se_cmd *cmd)
@@ -2903,12 +2682,10 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
unsigned long flags;
WARN_ON(ioctx->state != SRPT_STATE_DONE);
- WARN_ON(ioctx->mapped_sg_count != 0);
- if (ioctx->n_rbuf > 1) {
- kfree(ioctx->rbufs);
- ioctx->rbufs = NULL;
- ioctx->n_rbuf = 0;
+ if (ioctx->n_rw_ctx) {
+ srpt_free_rw_ctxs(ch, ioctx);
+ ioctx->n_rw_ctx = 0;
}
spin_lock_irqsave(&ch->spinlock, flags);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index af9b8b527340..fee6bfd7ca21 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -42,6 +42,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_sa.h>
#include <rdma/ib_cm.h>
+#include <rdma/rw.h>
#include <scsi/srp.h>
@@ -105,7 +106,6 @@ enum {
SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
SRPT_DEF_SG_TABLESIZE = 128,
- SRPT_DEF_SG_PER_WQE = 16,
MIN_SRPT_SQ_SIZE = 16,
DEF_SRPT_SQ_SIZE = 4096,
@@ -174,21 +174,17 @@ struct srpt_recv_ioctx {
struct srpt_ioctx ioctx;
struct list_head wait_list;
};
+
+struct srpt_rw_ctx {
+ struct rdma_rw_ctx rw;
+ struct scatterlist *sg;
+ unsigned int nents;
+};
/**
* struct srpt_send_ioctx - SRPT send I/O context.
* @ioctx: See above.
* @ch: Channel pointer.
- * @free_list: Node in srpt_rdma_ch.free_list.
- * @n_rbuf: Number of data buffers in the received SRP command.
- * @rbufs: Pointer to SRP data buffer array.
- * @single_rbuf: SRP data buffer if the command has only a single buffer.
- * @sg: Pointer to sg-list associated with this I/O context.
- * @sg_cnt: SG-list size.
- * @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_wrs: Number of elements in the rdma_wrs array.
- * @rdma_wrs: Array with information about the RDMA mapping.
- * @tag: Tag of the received SRP information unit.
* @spinlock: Protects 'state'.
* @state: I/O context state.
* @cmd: Target core command data structure.
@@ -197,21 +193,18 @@ struct srpt_recv_ioctx {
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
- struct ib_rdma_wr *rdma_wrs;
+
+ struct srpt_rw_ctx s_rw_ctx;
+ struct srpt_rw_ctx *rw_ctxs;
+
struct ib_cqe rdma_cqe;
- struct srp_direct_buf *rbufs;
- struct srp_direct_buf single_rbuf;
- struct scatterlist *sg;
struct list_head free_list;
spinlock_t spinlock;
enum srpt_command_state state;
struct se_cmd cmd;
struct completion tx_done;
- int sg_cnt;
- int mapped_sg_count;
- u16 n_rdma_wrs;
u8 n_rdma;
- u8 n_rbuf;
+ u8 n_rw_ctx;
bool queue_status_only;
u8 sense_data[TRANSPORT_SENSE_BUFFER];
};