summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/Makefile3
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h47
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c138
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.h21
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c130
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h4
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c453
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c73
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h23
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c19
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h13
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c35
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h57
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h63
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_defs.h4
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c6
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h4
-rw-r--r--drivers/infiniband/hw/efa/efa_io_defs.h106
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c51
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c2
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c48
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c257
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c7
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c77
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c93
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h4
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c8
-rw-r--r--drivers/infiniband/hw/mlx5/main.c78
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c51
39 files changed, 1570 insertions, 357 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile
index ee9bb1be61ea..f63417d2ccc6 100644
--- a/drivers/infiniband/hw/bnxt_re/Makefile
+++ b/drivers/infiniband/hw/bnxt_re/Makefile
@@ -4,4 +4,5 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnxt
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
bnxt_re-y := main.o ib_verbs.o \
qplib_res.o qplib_rcfw.o \
- qplib_sp.o qplib_fp.o hw_counters.o
+ qplib_sp.o qplib_fp.o hw_counters.o \
+ debugfs.o
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index e94518b12f86..2975b11b79bf 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -154,8 +154,25 @@ struct bnxt_re_pacing {
#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000
+#define BNXT_RE_MIN_MSIX 2
+#define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX
+struct bnxt_re_nq_record {
+ struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
+ struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
+ int num_msix;
+ /* serialize NQ access */
+ struct mutex load_lock;
+};
+
#define MAX_CQ_HASH_BITS (16)
#define MAX_SRQ_HASH_BITS (16)
+
+static inline bool bnxt_re_chip_gen_p7(u16 chip_num)
+{
+ return (chip_num == CHIP_NUM_58818 ||
+ chip_num == CHIP_NUM_57608);
+}
+
struct bnxt_re_dev {
struct ib_device ibdev;
struct list_head list;
@@ -174,27 +191,24 @@ struct bnxt_re_dev {
unsigned int version, major, minor;
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
- int num_msix;
int id;
struct delayed_work worker;
u8 cur_prio_map;
- /* FP Notification Queue (CQ & SRQ) */
- struct tasklet_struct nq_task;
-
/* RCFW Channel */
struct bnxt_qplib_rcfw rcfw;
- /* NQ */
- struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX];
+ /* NQ record */
+ struct bnxt_re_nq_record *nqr;
/* Device Resources */
struct bnxt_qplib_dev_attr dev_attr;
struct bnxt_qplib_ctx qplib_ctx;
struct bnxt_qplib_res qplib_res;
struct bnxt_qplib_dpi dpi_privileged;
+ struct bnxt_qplib_cq_coal_param cq_coalescing;
struct mutex qp_lock; /* protect qp list */
struct list_head qp_list;
@@ -213,6 +227,8 @@ struct bnxt_re_dev {
struct delayed_work dbq_pacing_work;
DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS);
DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS);
+ struct dentry *dbg_root;
+ struct dentry *qp_debugfs;
};
#define to_bnxt_re_dev(ptr, member) \
@@ -239,4 +255,23 @@ static inline void bnxt_re_set_pacing_dev_state(struct bnxt_re_dev *rdev)
rdev->qplib_res.pacing_data->dev_err_state =
test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
}
+
+static inline int bnxt_re_read_context_allowed(struct bnxt_re_dev *rdev)
+{
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ||
+ rdev->rcfw.res->cctx->hwrm_intf_ver < HWRM_VERSION_READ_CTX)
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5 1088
+#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5 128
+#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5 128
+#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5 192
+
+#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 1088
+#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 192
+#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 192
+#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 192
+
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
new file mode 100644
index 000000000000..7c47039044ef
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright (c) 2024, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * Description: Debugfs component of the bnxt_re driver
+ */
+
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <rdma/ib_addr.h>
+
+#include "bnxt_ulp.h"
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include "debugfs.h"
+
+static struct dentry *bnxt_re_debugfs_root;
+
+static inline const char *bnxt_re_qp_state_str(u8 state)
+{
+ switch (state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ return "RST";
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ return "INIT";
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ return "RTR";
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ return "RTS";
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ return "SQER";
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ return "SQD";
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ return "ERR";
+ default:
+ return "Invalid QP state";
+ }
+}
+
+static inline const char *bnxt_re_qp_type_str(u8 type)
+{
+ switch (type) {
+ case CMDQ_CREATE_QP1_TYPE_GSI: return "QP1";
+ case CMDQ_CREATE_QP_TYPE_GSI: return "QP1";
+ case CMDQ_CREATE_QP_TYPE_RC: return "RC";
+ case CMDQ_CREATE_QP_TYPE_UD: return "UD";
+ case CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
+ default: return "Invalid transport type";
+ }
+}
+
+static ssize_t qp_info_read(struct file *filep,
+ char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_qp *qp = filep->private_data;
+ char *buf;
+ int len;
+
+ if (*ppos)
+ return 0;
+
+ buf = kasprintf(GFP_KERNEL,
+ "QPN\t\t: %d\n"
+ "transport\t: %s\n"
+ "state\t\t: %s\n"
+ "mtu\t\t: %d\n"
+ "timeout\t\t: %d\n"
+ "remote QPN\t: %d\n",
+ qp->qplib_qp.id,
+ bnxt_re_qp_type_str(qp->qplib_qp.type),
+ bnxt_re_qp_state_str(qp->qplib_qp.state),
+ qp->qplib_qp.mtu,
+ qp->qplib_qp.timeout,
+ qp->qplib_qp.dest_qpn);
+ if (!buf)
+ return -ENOMEM;
+ if (count < strlen(buf)) {
+ kfree(buf);
+ return -ENOSPC;
+ }
+ len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+ kfree(buf);
+ return len;
+}
+
+static const struct file_operations debugfs_qp_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = qp_info_read,
+};
+
+void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ char resn[32];
+
+ sprintf(resn, "0x%x", qp->qplib_qp.id);
+ qp->dentry = debugfs_create_file(resn, 0400, rdev->qp_debugfs, qp, &debugfs_qp_fops);
+}
+
+void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ debugfs_remove(qp->dentry);
+}
+
+void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->en_dev->pdev;
+
+ rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
+
+ rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
+}
+
+void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
+{
+ debugfs_remove_recursive(rdev->qp_debugfs);
+
+ debugfs_remove_recursive(rdev->dbg_root);
+ rdev->dbg_root = NULL;
+}
+
+void bnxt_re_register_debugfs(void)
+{
+ bnxt_re_debugfs_root = debugfs_create_dir("bnxt_re", NULL);
+}
+
+void bnxt_re_unregister_debugfs(void)
+{
+ debugfs_remove(bnxt_re_debugfs_root);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.h b/drivers/infiniband/hw/bnxt_re/debugfs.h
new file mode 100644
index 000000000000..cd3be0a9ec7e
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright (c) 2024, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * Description: Debugfs header
+ */
+
+#ifndef __BNXT_RE_DEBUGFS__
+#define __BNXT_RE_DEBUGFS__
+
+void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
+void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
+
+void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev);
+void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
+
+void bnxt_re_register_debugfs(void);
+void bnxt_re_unregister_debugfs(void);
+
+#endif
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index e66ae9f22c71..82023394e330 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -62,6 +62,7 @@
#include "bnxt_re.h"
#include "ib_verbs.h"
+#include "debugfs.h"
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_std_types.h>
@@ -94,9 +95,9 @@ static int __from_ib_access_flags(int iflags)
return qflags;
};
-static enum ib_access_flags __to_ib_access_flags(int qflags)
+static int __to_ib_access_flags(int qflags)
{
- enum ib_access_flags iflags = 0;
+ int iflags = 0;
if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
iflags |= IB_ACCESS_LOCAL_WRITE;
@@ -113,7 +114,49 @@ static enum ib_access_flags __to_ib_access_flags(int qflags)
if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
iflags |= IB_ACCESS_ON_DEMAND;
return iflags;
-};
+}
+
+static u8 __qp_access_flags_from_ib(struct bnxt_qplib_chip_ctx *cctx, int iflags)
+{
+ u8 qflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return (u8)__from_ib_access_flags(iflags);
+
+ /* For P5, P7 and later chips */
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC;
+
+ return qflags;
+}
+
+static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags)
+{
+ int iflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return __to_ib_access_flags(qflags);
+
+ /* For P5, P7 and later chips */
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return iflags;
+}
static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev,
struct bnxt_qplib_mrw *qplib_mr)
@@ -211,6 +254,22 @@ int bnxt_re_query_device(struct ib_device *ibdev,
return 0;
}
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ ibdev_dbg(ibdev, "Modify device with mask 0x%x", device_modify_mask);
+
+ if (device_modify_mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (!(device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC))
+ return 0;
+
+ memcpy(ibdev->node_desc, device_modify->node_desc, IB_DEVICE_NODE_DESC_MAX);
+ return 0;
+}
+
/* Port */
int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr)
@@ -939,6 +998,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD)
atomic_dec(&rdev->stats.res.ud_qp_count);
+ bnxt_re_debug_rem_qpinfo(rdev, qp);
+
ib_umem_release(qp->rumem);
ib_umem_release(qp->sumem);
@@ -1622,6 +1683,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
if (active_qps > rdev->stats.res.ud_qp_watermark)
rdev->stats.res.ud_qp_watermark = active_qps;
}
+ bnxt_re_debug_add_qpinfo(rdev, qp);
return 0;
qp_destroy:
@@ -1814,8 +1876,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges);
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
srq->srq_limit = srq_init_attr->attr.srq_limit;
- srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
- nq = &rdev->nq[0];
+ srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
+ nq = &rdev->nqr->nq[0];
if (udata) {
rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
@@ -2041,12 +2103,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
qp->qplib_qp.access =
- __from_ib_access_flags(qp_attr->qp_access_flags);
+ __qp_access_flags_from_ib(qp->qplib_qp.cctx,
+ qp_attr->qp_access_flags);
/* LOCAL_WRITE access must be set to allow RC receive */
- qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
- /* Temp: Set all params on QP as of now */
- qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
- qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
+ qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
}
if (qp_attr_mask & IB_QP_PKEY_INDEX) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
@@ -2080,7 +2140,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp->qplib_qp.ah.sgid_index = ctx->idx;
qp->qplib_qp.ah.host_sgid_index = grh->sgid_index;
qp->qplib_qp.ah.hop_limit = grh->hop_limit;
- qp->qplib_qp.ah.traffic_class = grh->traffic_class;
+ qp->qplib_qp.ah.traffic_class = grh->traffic_class >> 2;
qp->qplib_qp.ah.sl = rdma_ah_get_sl(&qp_attr->ah_attr);
ether_addr_copy(qp->qplib_qp.ah.dmac,
qp_attr->ah_attr.roce.dmac);
@@ -2251,7 +2311,8 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state);
qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
- qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
+ qp_attr->qp_access_flags = __qp_access_flags_to_ib(qp->qplib_qp.cctx,
+ qplib_qp->access);
qp_attr->pkey_index = qplib_qp->pkey_index;
qp_attr->qkey = qplib_qp->qkey;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
@@ -2972,6 +3033,28 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
return rc;
}
+static struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev)
+{
+ int min, indx;
+
+ mutex_lock(&rdev->nqr->load_lock);
+ for (indx = 0, min = 0; indx < (rdev->nqr->num_msix - 1); indx++) {
+ if (rdev->nqr->nq[min].load > rdev->nqr->nq[indx].load)
+ min = indx;
+ }
+ rdev->nqr->nq[min].load++;
+ mutex_unlock(&rdev->nqr->load_lock);
+
+ return &rdev->nqr->nq[min];
+}
+
+static void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq)
+{
+ mutex_lock(&rdev->nqr->load_lock);
+ nq->load--;
+ mutex_unlock(&rdev->nqr->load_lock);
+}
+
/* Completion Queues */
int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
@@ -2990,6 +3073,8 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
hash_del(&cq->hash_entry);
}
bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+
+ bnxt_re_put_nq(rdev, nq);
ib_umem_release(cq->umem);
atomic_dec(&rdev->stats.res.cq_count);
@@ -3008,8 +3093,6 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
struct bnxt_qplib_chip_ctx *cctx;
- struct bnxt_qplib_nq *nq = NULL;
- unsigned int nq_alloc_cnt;
int cqe = attr->cqe;
int rc, entries;
u32 active_cqs;
@@ -3060,15 +3143,10 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->qplib_cq.dpi = &rdev->dpi_privileged;
}
- /*
- * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a
- * used for getting the NQ index.
- */
- nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt);
- nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)];
cq->qplib_cq.max_wqe = entries;
- cq->qplib_cq.cnq_hw_ring_id = nq->ring_id;
- cq->qplib_cq.nq = nq;
+ cq->qplib_cq.coalescing = &rdev->cq_coalescing;
+ cq->qplib_cq.nq = bnxt_re_get_nq(rdev);
+ cq->qplib_cq.cnq_hw_ring_id = cq->qplib_cq.nq->ring_id;
rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
if (rc) {
@@ -3078,7 +3156,6 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->ib_cq.cqe = entries;
cq->cq_period = cq->qplib_cq.period;
- nq->budget++;
active_cqs = atomic_inc_return(&rdev->stats.res.cq_count);
if (active_cqs > rdev->stats.res.cq_watermark)
@@ -3633,7 +3710,7 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp,
wc->byte_len = orig_cqe->length;
wc->qp = &gsi_qp->ib_qp;
- wc->ex.imm_data = cpu_to_be32(le32_to_cpu(orig_cqe->immdata));
+ wc->ex.imm_data = cpu_to_be32(orig_cqe->immdata);
wc->src_qp = orig_cqe->src_qp;
memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) {
@@ -3778,7 +3855,10 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
(unsigned long)(cqe->qp_handle),
struct bnxt_re_qp, qplib_qp);
wc->qp = &qp->ib_qp;
- wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immdata));
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->ex.imm_data = cpu_to_be32(cqe->immdata);
+ else
+ wc->ex.invalidate_rkey = cqe->invrkey;
wc->src_qp = cqe->src_qp;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->port_num = 1;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index b789e47ec97a..ac59f1d73b15 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -95,6 +95,7 @@ struct bnxt_re_qp {
struct ib_ud_header qp1_hdr;
struct bnxt_re_cq *scq;
struct bnxt_re_cq *rcq;
+ struct dentry *dentry;
};
struct bnxt_re_cq {
@@ -196,6 +197,9 @@ static inline bool bnxt_re_is_var_size_supported(struct bnxt_re_dev *rdev,
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify);
int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr);
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 9eb290ec71a8..b7af0d5ff3b6 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -67,6 +67,7 @@
#include <rdma/bnxt_re-abi.h>
#include "bnxt.h"
#include "hw_counters.h"
+#include "debugfs.h"
static char version[] =
BNXT_RE_DESC "\n";
@@ -183,6 +184,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
rdev->rcfw.res = &rdev->qplib_res;
rdev->qplib_res.dattr = &rdev->dev_attr;
rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
+ rdev->qplib_res.en_dev = en_dev;
bnxt_re_set_drv_mode(rdev);
@@ -287,12 +289,17 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
{
+ /*
+ * Use the total VF count since the actual VF count may not be
+ * available at this point.
+ */
rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
- if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
- bnxt_re_set_resource_limits(rdev);
- bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
- &rdev->qplib_ctx);
- }
+ if (!rdev->num_vfs)
+ return;
+
+ bnxt_re_set_resource_limits(rdev);
+ bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
+ &rdev->qplib_ctx);
}
static void bnxt_re_shutdown(struct auxiliary_device *adev)
@@ -316,8 +323,8 @@ static void bnxt_re_stop_irq(void *handle)
rdev = en_info->rdev;
rcfw = &rdev->rcfw;
- for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
- nq = &rdev->nq[indx - 1];
+ for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
+ nq = &rdev->nqr->nq[indx - 1];
bnxt_qplib_nq_stop_irq(nq, false);
}
@@ -334,7 +341,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
int indx, rc;
rdev = en_info->rdev;
- msix_ent = rdev->en_dev->msix_entries;
+ msix_ent = rdev->nqr->msix_entries;
rcfw = &rdev->rcfw;
if (!ent) {
/* Not setting the f/w timeout bit in rcfw.
@@ -349,8 +356,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
/* Vectors may change after restart, so update with new vectors
* in device sctructure.
*/
- for (indx = 0; indx < rdev->num_msix; indx++)
- rdev->en_dev->msix_entries[indx].vector = ent[indx].vector;
+ for (indx = 0; indx < rdev->nqr->num_msix; indx++)
+ rdev->nqr->msix_entries[indx].vector = ent[indx].vector;
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
false);
@@ -358,8 +365,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
return;
}
- for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
- nq = &rdev->nq[indx - 1];
+ for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) {
+ nq = &rdev->nqr->nq[indx - 1];
rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
msix_ent[indx].vector, false);
if (rc) {
@@ -873,6 +880,253 @@ static const struct attribute_group bnxt_re_dev_attr_group = {
.attrs = bnxt_re_attributes,
};
+static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct bnxt_qplib_hwq *mr_hwq;
+ struct nlattr *table_attr;
+ struct bnxt_re_mr *mr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ mr_hwq = &mr->qplib_mr.hwq;
+
+ if (rdma_nl_put_driver_u32(msg, "page_size",
+ mr_hwq->qe_ppg * mr_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_mr *mr;
+ int err, len;
+ void *data;
+
+ mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ rdev = mr->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW,
+ mr->qplib_mr.lkey, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
+static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct bnxt_qplib_hwq *cq_hwq;
+ struct nlattr *table_attr;
+ struct bnxt_re_cq *cq;
+
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ cq_hwq = &cq->qplib_cq.hwq;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+ int err, len;
+ void *data;
+
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ rdev = cq->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw,
+ CMDQ_READ_CONTEXT_TYPE_CQ,
+ cq->qplib_cq.id, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
+static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct bnxt_qplib_qp *qplib_qp;
+ struct nlattr *table_attr;
+ struct bnxt_re_qp *qp;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ qplib_qp = &qp->qplib_qp;
+
+ if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev);
+ int err, len;
+ void *data;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC,
+ ibqp->qp_num, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
+static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct nlattr *table_attr;
+ struct bnxt_re_srq *srq;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+
+ if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ int err, len;
+ void *data;
+
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+ rdev = srq->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5;
+
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ,
+ srq->qplib_srq.id, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
static const struct ib_device_ops bnxt_re_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_BNXT_RE,
@@ -914,6 +1168,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.post_srq_recv = bnxt_re_post_srq_recv,
.query_ah = bnxt_re_query_ah,
.query_device = bnxt_re_query_device,
+ .modify_device = bnxt_re_modify_device,
.query_pkey = bnxt_re_query_pkey,
.query_port = bnxt_re_query_port,
.query_qp = bnxt_re_query_qp,
@@ -930,6 +1185,17 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
};
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry = bnxt_re_fill_res_cq_entry,
+ .fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw,
+ .fill_res_qp_entry = bnxt_re_fill_res_qp_entry,
+ .fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw,
+ .fill_res_mr_entry = bnxt_re_fill_res_mr_entry,
+ .fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw,
+ .fill_res_srq_entry = bnxt_re_fill_res_srq_entry,
+ .fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw,
+};
+
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
{
struct ib_device *ibdev = &rdev->ibdev;
@@ -943,7 +1209,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
- ibdev->num_comp_vectors = rdev->num_msix - 1;
+ ibdev->num_comp_vectors = rdev->nqr->num_msix - 1;
ibdev->dev.parent = &rdev->en_dev->pdev->dev;
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
@@ -951,6 +1217,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->driver_def = bnxt_re_uapi_defs;
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
+ ib_set_device_ops(ibdev, &restrack_ops);
ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
if (ret)
return ret;
@@ -990,6 +1257,15 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
atomic_set(&rdev->stats.res.pd_count, 0);
rdev->cosq[0] = 0xFFFF;
rdev->cosq[1] = 0xFFFF;
+ rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME;
+ if (bnxt_re_chip_gen_p7(en_dev->chip_num)) {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7;
+ } else {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5;
+ }
+ rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE;
return rdev;
}
@@ -1276,8 +1552,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
- for (i = 1; i < rdev->num_msix; i++)
- bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
+ for (i = 1; i < rdev->nqr->num_msix; i++)
+ bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]);
if (rdev->qplib_res.rcfw)
bnxt_qplib_cleanup_res(&rdev->qplib_res);
@@ -1291,10 +1567,12 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
bnxt_qplib_init_res(&rdev->qplib_res);
- for (i = 1; i < rdev->num_msix ; i++) {
- db_offt = rdev->en_dev->msix_entries[i].db_offset;
- rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
- i - 1, rdev->en_dev->msix_entries[i].vector,
+ mutex_init(&rdev->nqr->load_lock);
+
+ for (i = 1; i < rdev->nqr->num_msix ; i++) {
+ db_offt = rdev->nqr->msix_entries[i].db_offset;
+ rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1],
+ i - 1, rdev->nqr->msix_entries[i].vector,
db_offt, &bnxt_re_cqn_handler,
&bnxt_re_srqn_handler);
if (rc) {
@@ -1307,20 +1585,22 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
return 0;
fail:
for (i = num_vec_enabled; i >= 0; i--)
- bnxt_qplib_disable_nq(&rdev->nq[i]);
+ bnxt_qplib_disable_nq(&rdev->nqr->nq[i]);
return rc;
}
static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
{
+ struct bnxt_qplib_nq *nq;
u8 type;
int i;
- for (i = 0; i < rdev->num_msix - 1; i++) {
+ for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
- bnxt_qplib_free_nq(&rdev->nq[i]);
- rdev->nq[i].res = NULL;
+ nq = &rdev->nqr->nq[i];
+ bnxt_re_net_ring_free(rdev, nq->ring_id, type);
+ bnxt_qplib_free_nq(nq);
+ nq->res = NULL;
}
}
@@ -1362,12 +1642,12 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
if (rc)
goto dealloc_res;
- for (i = 0; i < rdev->num_msix - 1; i++) {
+ for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
struct bnxt_qplib_nq *nq;
- nq = &rdev->nq[i];
+ nq = &rdev->nqr->nq[i];
nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
- rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]);
+ rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq);
if (rc) {
ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
i, rc);
@@ -1375,17 +1655,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
}
type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
- rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count;
+ rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count;
rattr.type = type;
rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
- rattr.lrid = rdev->en_dev->msix_entries[i + 1].ring_idx;
+ rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx;
rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate NQ fw id with rc = 0x%x",
rc);
- bnxt_qplib_free_nq(&rdev->nq[i]);
+ bnxt_qplib_free_nq(nq);
goto free_nq;
}
num_vec_created++;
@@ -1394,8 +1674,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
free_nq:
for (i = num_vec_created - 1; i >= 0; i--) {
type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
- bnxt_qplib_free_nq(&rdev->nq[i]);
+ bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type);
+ bnxt_qplib_free_nq(&rdev->nqr->nq[i]);
}
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->dpi_privileged);
@@ -1590,11 +1870,28 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
+static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev)
+{
+ rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
+ if (!rdev->nqr)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev)
+{
+ kfree(rdev->nqr);
+ rdev->nqr = NULL;
+}
+
static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
{
u8 type;
int rc;
+ bnxt_re_debugfs_rem_pdev(rdev);
+
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
cancel_delayed_work_sync(&rdev->worker);
@@ -1617,11 +1914,12 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
}
- rdev->num_msix = 0;
+ rdev->nqr->num_msix = 0;
if (rdev->pacing.dbr_pacing)
bnxt_re_deinitialize_dbr_pacing(rdev);
+ bnxt_re_free_nqr_mem(rdev);
bnxt_re_destroy_chip_ctx(rdev);
if (op_type == BNXT_RE_COMPLETE_REMOVE) {
if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
@@ -1659,6 +1957,17 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) {
+ ibdev_err(&rdev->ibdev,
+ "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
+ rdev->en_dev->ulp_tbl->msix_requested);
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return -EINVAL;
+ }
+ ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
+ rdev->en_dev->ulp_tbl->msix_requested);
+
rc = bnxt_re_setup_chip_ctx(rdev);
if (rc) {
bnxt_unregister_dev(rdev->en_dev);
@@ -1667,19 +1976,20 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
return -EINVAL;
}
+ rc = bnxt_re_alloc_nqr_mem(rdev);
+ if (rc) {
+ bnxt_re_destroy_chip_ctx(rdev);
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return rc;
+ }
+ rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
+ memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries,
+ sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix);
+
/* Check whether VF or PF */
bnxt_re_get_sriov_func_type(rdev);
- if (!rdev->en_dev->ulp_tbl->msix_requested) {
- ibdev_err(&rdev->ibdev,
- "Failed to get MSI-X vectors: %#x\n", rc);
- rc = -EINVAL;
- goto fail;
- }
- ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
- rdev->en_dev->ulp_tbl->msix_requested);
- rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
-
bnxt_re_query_hwrm_intf_version(rdev);
/* Establish RCFW Communication Channel to initialize the context
@@ -1701,14 +2011,14 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
rattr.type = type;
rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
- rattr.lrid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
+ rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
goto free_rcfw;
}
- db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
- vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector;
+ db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
+ vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector;
rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
vid, db_offt,
&bnxt_re_aeq_handler);
@@ -1785,16 +2095,16 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
- /*
- * Use the total VF count since the actual VF count may not be
- * available at this point.
- */
- bnxt_re_vf_res_config(rdev);
+
+ if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT))
+ bnxt_re_vf_res_config(rdev);
}
hash_init(rdev->cq_hash);
if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT)
hash_init(rdev->srq_hash);
+ bnxt_re_debugfs_add_pdev(rdev);
+
return 0;
free_sctx:
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
@@ -1896,11 +2206,10 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
if (enable) {
cc_param.enable = 1;
- cc_param.cc_mode = CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE;
+ cc_param.tos_ecn = 1;
}
- cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE |
- CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
+ cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
@@ -2033,12 +2342,6 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
rdev = en_info->rdev;
en_dev = en_info->en_dev;
mutex_lock(&bnxt_re_mutex);
- /* L2 driver may invoke this callback during device error/crash or device
- * reset. Current RoCE driver doesn't recover the device in case of
- * error. Handle the error by dispatching fatal events to all qps
- * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
- * L2 driver want to modify the MSIx table.
- */
ibdev_info(&rdev->ibdev, "Handle device suspend call");
/* Check the current device state from bnxt_en_dev and move the
@@ -2046,17 +2349,12 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
* This prevents more commands to HW during clean-up,
* in case the device is already in error.
*/
- if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state))
+ if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) {
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
-
- bnxt_re_dev_stop(rdev);
- bnxt_re_stop_irq(adev);
- /* Move the device states to detached and avoid sending any more
- * commands to HW
- */
- set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
- set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
- wake_up_all(&rdev->rcfw.cmdq.waitq);
+ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
+ bnxt_re_dev_stop(rdev);
+ }
if (rdev->pacing.dbr_pacing)
bnxt_re_set_pacing_dev_state(rdev);
@@ -2075,13 +2373,6 @@ static int bnxt_re_resume(struct auxiliary_device *adev)
struct bnxt_re_dev *rdev;
mutex_lock(&bnxt_re_mutex);
- /* L2 driver may invoke this callback during device recovery, resume.
- * reset. Current RoCE driver doesn't recover the device in case of
- * error. Handle the error by dispatching fatal events to all qps
- * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
- * L2 driver want to modify the MSIx table.
- */
-
bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT);
rdev = en_info->rdev;
ibdev_info(&rdev->ibdev, "Device resume completed");
@@ -2112,18 +2403,24 @@ static int __init bnxt_re_mod_init(void)
int rc;
pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
+ bnxt_re_register_debugfs();
+
rc = auxiliary_driver_register(&bnxt_re_driver);
if (rc) {
pr_err("%s: Failed to register auxiliary driver\n",
ROCE_DRV_MODULE_NAME);
- return rc;
+ goto err_debug;
}
return 0;
+err_debug:
+ bnxt_re_unregister_debugfs();
+ return rc;
}
static void __exit bnxt_re_mod_exit(void)
{
auxiliary_driver_unregister(&bnxt_re_driver);
+ bnxt_re_unregister_debugfs();
}
module_init(bnxt_re_mod_init);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 7ad83566ab0f..e42abf5be6c0 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -556,6 +556,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
nq->pdev = pdev;
nq->cqn_handler = cqn_handler;
nq->srqn_handler = srqn_handler;
+ nq->load = 0;
/* Have a task to schedule CQ notifiers in post send case */
nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq");
@@ -1282,12 +1283,47 @@ static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
}
}
+static void bnxt_set_mandatory_attributes(struct bnxt_qplib_qp *qp,
+ struct cmdq_modify_qp *req)
+{
+ u32 mandatory_flags = 0;
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+
+ if (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) {
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC && qp->srq)
+ req->flags = cpu_to_le16(CMDQ_MODIFY_QP_FLAGS_SRQ_USED);
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ }
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD ||
+ qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+
+ qp->modify_flags |= mandatory_flags;
+ req->qp_type = qp->type;
+}
+
+static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp)
+{
+ if ((qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) ||
+ (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS))
+ return true;
+
+ return false;
+}
+
int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct creq_modify_qp_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_modify_qp req = {};
+ u16 vlan_pcp_vlan_dei_vlan_id;
u32 temp32[4];
u32 bmask;
int rc;
@@ -1298,6 +1334,12 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* Filter out the qp_attr_mask based on the state->new transition */
__filter_modify_flags(qp);
+ if (qp->modify_flags & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+ /* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */
+ if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) &&
+ is_optimized_state_transition(qp))
+ bnxt_set_mandatory_attributes(qp, &req);
+ }
bmask = qp->modify_flags;
req.modify_mask = cpu_to_le32(qp->modify_flags);
req.qp_cid = cpu_to_le32(qp->id);
@@ -1378,7 +1420,16 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID)
req.dest_qp_id = cpu_to_le32(qp->dest_qpn);
- req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID) {
+ vlan_pcp_vlan_dei_vlan_id =
+ ((res->sgid_tbl.tbl[qp->ah.sgid_index].vlan_id <<
+ CMDQ_MODIFY_QP_VLAN_ID_SFT) &
+ CMDQ_MODIFY_QP_VLAN_ID_MASK);
+ vlan_pcp_vlan_dei_vlan_id |=
+ ((qp->ah.sl << CMDQ_MODIFY_QP_VLAN_PCP_SFT) &
+ CMDQ_MODIFY_QP_VLAN_PCP_MASK);
+ req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(vlan_pcp_vlan_dei_vlan_id);
+ }
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
@@ -2151,6 +2202,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_create_cq req = {};
struct bnxt_qplib_pbl *pbl;
+ u32 coalescing = 0;
u32 pg_sz_lvl;
int rc;
@@ -2177,6 +2229,25 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
req.cq_size = cpu_to_le32(cq->max_wqe);
+
+ if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2)) {
+ req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID);
+ coalescing |= ((cq->coalescing->buf_maxtime <<
+ CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) &
+ CMDQ_CREATE_CQ_BUF_MAXTIME_MASK);
+ coalescing |= ((cq->coalescing->normal_maxbuf <<
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK);
+ coalescing |= ((cq->coalescing->during_maxbuf <<
+ CMDQ_CREATE_CQ_DURING_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_DURING_MAXBUF_MASK);
+ if (cq->coalescing->en_ring_idle_mode)
+ coalescing |= CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ else
+ coalescing &= ~CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ req.coalescing = cpu_to_le32(coalescing);
+ }
+
pbl = &cq->hwq.pbl[PBL_LVL_0];
pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
CMDQ_CREATE_CQ_PG_SIZE_SFT);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 820611a23943..ef3424c81345 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -383,6 +383,25 @@ static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
return avail <= slots;
}
+/* CQ coalescing parameters */
+struct bnxt_qplib_cq_coal_param {
+ u16 buf_maxtime;
+ u8 normal_maxbuf;
+ u8 during_maxbuf;
+ u8 en_ring_idle_mode;
+};
+
+#define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE 0x1
+#define BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME 0x1bf
+#define BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE 0x1
+
struct bnxt_qplib_cqe {
u8 status;
u8 type;
@@ -391,7 +410,7 @@ struct bnxt_qplib_cqe {
u16 cfa_meta;
u64 wr_id;
union {
- __le32 immdata;
+ u32 immdata;
u32 invrkey;
};
u64 qp_handle;
@@ -445,6 +464,7 @@ struct bnxt_qplib_cq {
*/
spinlock_t flush_lock; /* QP flush management */
u16 cnq_events;
+ struct bnxt_qplib_cq_coal_param *coalescing;
};
#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
@@ -499,6 +519,7 @@ struct bnxt_qplib_nq {
struct tasklet_struct nq_tasklet;
bool requested;
int budget;
+ u32 load;
cqn_handler_t cqn_handler;
srqn_handler_t srqn_handler;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index e82bd37158ad..5e90ea232de8 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -831,6 +831,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct creq_initialize_fw_resp resp = {};
struct cmdq_initialize_fw req = {};
struct bnxt_qplib_cmdqmsg msg = {};
+ u16 flags = 0;
u8 pgsz, lvl;
int rc;
@@ -849,10 +850,8 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
* shall setup this area for VF. Skipping the
* HW programming
*/
- if (is_virtfn)
+ if (is_virtfn || bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
goto skip_ctx_setup;
- if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
- goto config_vf_res;
lvl = ctx->qpc_tbl.level;
pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl);
@@ -896,16 +895,14 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
-config_vf_res:
- req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
- req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
- req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
- req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
- req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
-
skip_ctx_setup:
if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
- req.flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED);
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED;
+ if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
+ if (rcfw->res->en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT;
+ req.flags |= cpu_to_le16(flags);
req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 07779aeb7575..88814cb3aa74 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -131,6 +131,8 @@ static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
#define RCFW_CMD_IS_BLOCKING 0x8000
#define HWRM_VERSION_DEV_ATTR_MAX_DPI 0x1000A0000000DULL
+/* HWRM version 1.10.3.18 */
+#define HWRM_VERSION_READ_CTX 0x1000A00030012
/* Crsq buf is 1024-Byte */
struct bnxt_qplib_crsbe {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index c2f710364e0f..21fb148713a6 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -39,6 +39,8 @@
#ifndef __BNXT_QPLIB_RES_H__
#define __BNXT_QPLIB_RES_H__
+#include "bnxt_ulp.h"
+
extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
#define CHIP_NUM_57508 0x1750
@@ -302,6 +304,7 @@ struct bnxt_qplib_res {
struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_qplib_dev_attr *dattr;
struct net_device *netdev;
+ struct bnxt_en_dev *en_dev;
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
/* To protect the pd table bit map */
@@ -576,4 +579,14 @@ static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2)
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED;
}
+static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
+}
+
+static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index e29fbbdab9fd..7e20ae3d2c4f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -981,3 +981,38 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
return rc;
}
+
+int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 res_type,
+ u32 xid, u32 resp_size, void *resp_va)
+{
+ struct creq_read_context resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_read_context req = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
+
+ sbuf.size = resp_size;
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_READ_CONTEXT, sizeof(req));
+ req.resp_addr = cpu_to_le64(sbuf.dma_addr);
+ req.resp_size = resp_size / BNXT_QPLIB_CMDQE_UNITS;
+
+ req.xid = cpu_to_le32(xid);
+ req.type = res_type;
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto free_mem;
+
+ memcpy(resp_va, sbuf.sb, resp_size);
+free_mem:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index ecf3f45fea74..e6beeb514b7d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -353,6 +353,8 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
struct bnxt_qplib_ext_stat *estat);
int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
struct bnxt_qplib_cc_param *cc_param);
+int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid,
+ u32 resp_size, void *resp_va);
#define BNXT_VAR_MAX_WQE 4352
#define BNXT_VAR_MAX_SLOT_ALIGN 256
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 3ec895284e49..a98fc9c2313e 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -216,6 +216,8 @@ struct cmdq_initialize_fw {
__le16 flags;
#define CMDQ_INITIALIZE_FW_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL
#define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT 0x10UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -559,6 +561,7 @@ struct cmdq_modify_qp {
#define CMDQ_MODIFY_QP_OPCODE_LAST CMDQ_MODIFY_QP_OPCODE_MODIFY_QP
u8 cmd_size;
__le16 flags;
+ #define CMDQ_MODIFY_QP_FLAGS_SRQ_USED 0x1UL
__le16 cookie;
u8 resp_size;
u8 qp_type;
@@ -1137,6 +1140,7 @@ struct cmdq_create_cq {
#define CMDQ_CREATE_CQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x1UL
#define CMDQ_CREATE_CQ_FLAGS_STEERING_TAG_VALID 0x2UL
#define CMDQ_CREATE_CQ_FLAGS_INFINITE_CQ_MODE 0x4UL
+ #define CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID 0x8UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -1169,7 +1173,18 @@ struct cmdq_create_cq {
__le32 cq_size;
__le64 pbl;
__le16 steering_tag;
- u8 reserved48[6];
+ u8 reserved48[2];
+ __le32 coalescing;
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_MASK 0x1ffUL
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_SFT 0
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK 0x3e00UL
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT 9
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_MASK 0x7c000UL
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_SFT 14
+ #define CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE 0x80000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_MASK 0xfff00000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_SFT 20
+ __le64 reserved64;
};
/* creq_create_cq_resp (size:128b/16B) */
@@ -2251,6 +2266,46 @@ struct creq_set_func_resources_resp {
u8 reserved48[6];
};
+/* cmdq_read_context (size:192b/24B) */
+struct cmdq_read_context {
+ u8 opcode;
+ #define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL
+ #define CMDQ_READ_CONTEXT_OPCODE_LAST CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 xid;
+ u8 type;
+ #define CMDQ_READ_CONTEXT_TYPE_QPC 0x0UL
+ #define CMDQ_READ_CONTEXT_TYPE_CQ 0x1UL
+ #define CMDQ_READ_CONTEXT_TYPE_MRW 0x2UL
+ #define CMDQ_READ_CONTEXT_TYPE_SRQ 0x3UL
+ #define CMDQ_READ_CONTEXT_TYPE_LAST CMDQ_READ_CONTEXT_TYPE_SRQ
+ u8 unused_0[3];
+};
+
+/* creq_read_context (size:128b/16B) */
+struct creq_read_context {
+ u8 type;
+ #define CREQ_READ_CONTEXT_TYPE_MASK 0x3fUL
+ #define CREQ_READ_CONTEXT_TYPE_SFT 0
+ #define CREQ_READ_CONTEXT_TYPE_QP_EVENT 0x38UL
+ #define CREQ_READ_CONTEXT_TYPE_LAST CREQ_READ_CONTEXT_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_READ_CONTEXT_V 0x1UL
+ u8 event;
+ #define CREQ_READ_CONTEXT_EVENT_READ_CONTEXT 0x85UL
+ #define CREQ_READ_CONTEXT_EVENT_LAST CREQ_READ_CONTEXT_EVENT_READ_CONTEXT
+ __le16 reserved16;
+ __le32 reserved_32;
+};
+
/* cmdq_map_tc_to_cos (size:192b/24B) */
struct cmdq_map_tc_to_cos {
u8 opcode;
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index cd03a5429beb..fe0b6aec7839 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -30,7 +30,8 @@ enum efa_admin_aq_opcode {
EFA_ADMIN_DEALLOC_UAR = 17,
EFA_ADMIN_CREATE_EQ = 18,
EFA_ADMIN_DESTROY_EQ = 19,
- EFA_ADMIN_MAX_OPCODE = 19,
+ EFA_ADMIN_ALLOC_MR = 20,
+ EFA_ADMIN_MAX_OPCODE = 20,
};
enum efa_admin_aq_feature_id {
@@ -150,8 +151,11 @@ struct efa_admin_create_qp_cmd {
/* UAR number */
u16 uar;
+ /* Requested service level for the QP, 0 is the default SL */
+ u8 sl;
+
/* MBZ */
- u16 reserved;
+ u8 reserved;
/* MBZ */
u32 reserved2;
@@ -459,6 +463,41 @@ struct efa_admin_dereg_mr_resp {
struct efa_admin_acq_common_desc acq_common_desc;
};
+/*
+ * Allocation of MemoryRegion, required for QP working with Virtual
+ * Addresses in kernel verbs semantics, ready for fast registration use.
+ */
+struct efa_admin_alloc_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved1;
+
+ /* Maximum number of pages this MR supports. */
+ u32 max_pages;
+};
+
+struct efa_admin_alloc_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /*
+ * L_Key, to be used in conjunction with local buffer references in
+ * SQ and RQ WQE, or with virtual RQ/CQ rings
+ */
+ u32 l_key;
+
+ /*
+ * R_Key, to be used in RDMA messages to refer to remotely accessed
+ * memory region
+ */
+ u32 r_key;
+};
+
struct efa_admin_create_cq_cmd {
struct efa_admin_aq_common_desc aq_common_desc;
@@ -483,8 +522,8 @@ struct efa_admin_create_cq_cmd {
*/
u8 cq_caps_2;
- /* completion queue depth in # of entries. must be power of 2 */
- u16 cq_depth;
+ /* Sub completion queue depth in # of entries. must be power of 2 */
+ u16 sub_cq_depth;
/* EQ number assigned to this cq */
u16 eqn;
@@ -519,8 +558,8 @@ struct efa_admin_create_cq_resp {
u16 cq_idx;
- /* actual cq depth in number of entries */
- u16 cq_actual_depth;
+ /* actual sub cq depth in number of entries */
+ u16 sub_cq_actual_depth;
/* CQ doorbell address, as offset to PCIe DB BAR */
u32 db_offset;
@@ -578,6 +617,8 @@ struct efa_admin_basic_stats {
u64 rx_pkts;
u64 rx_drops;
+
+ u64 qkey_viol;
};
struct efa_admin_messages_stats {
@@ -677,6 +718,15 @@ struct efa_admin_feature_device_attr_desc {
/* Unique global ID for an EFA device */
u64 guid;
+
+ /* The device maximum link speed in Gbit/sec */
+ u16 max_link_speed_gbps;
+
+ /* MBZ */
+ u16 reserved0;
+
+ /* MBZ */
+ u32 reserved1;
};
struct efa_admin_feature_queue_attr_desc {
@@ -1057,7 +1107,6 @@ struct efa_admin_host_info {
/* create_eq_cmd */
#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
-#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6)
#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0)
/* host_info */
diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
index 83f20c38a840..35700c93e639 100644
--- a/drivers/infiniband/hw/efa/efa_admin_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_H_
@@ -96,7 +96,7 @@ struct efa_admin_acq_entry {
struct efa_admin_aenq_common_desc {
u16 group;
- u16 syndrom;
+ u16 syndrome;
/*
* 0 : phase
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index 5a774925cdea..c6b89c45fdc9 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -31,6 +31,7 @@ int efa_com_create_qp(struct efa_com_dev *edev,
create_qp_cmd.qp_alloc_size.recv_queue_depth =
params->rq_depth;
create_qp_cmd.uar = params->uarn;
+ create_qp_cmd.sl = params->sl;
if (params->unsolicited_write_recv)
EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1);
@@ -163,7 +164,7 @@ int efa_com_create_cq(struct efa_com_dev *edev,
EFA_SET(&create_cmd.cq_caps_2,
EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS,
params->entry_size_in_bytes / 4);
- create_cmd.cq_depth = params->cq_depth;
+ create_cmd.sub_cq_depth = params->sub_cq_depth;
create_cmd.num_sub_cqs = params->num_sub_cqs;
create_cmd.uar = params->uarn;
if (params->interrupt_mode_enabled) {
@@ -191,7 +192,7 @@ int efa_com_create_cq(struct efa_com_dev *edev,
}
result->cq_idx = cmd_completion.cq_idx;
- result->actual_depth = params->cq_depth;
+ result->actual_depth = params->sub_cq_depth;
result->db_off = cmd_completion.db_offset;
result->db_valid = EFA_GET(&cmd_completion.flags,
EFA_ADMIN_CREATE_CQ_RESP_DB_VALID);
@@ -466,6 +467,7 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->max_rdma_size = resp.u.device_attr.max_rdma_size;
result->device_caps = resp.u.device_attr.device_caps;
result->guid = resp.u.device_attr.guid;
+ result->max_link_speed_gbps = resp.u.device_attr.max_link_speed_gbps;
if (result->admin_api_version < 1) {
ibdev_err_ratelimited(
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 668d033f7477..5511355b700d 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -27,6 +27,7 @@ struct efa_com_create_qp_params {
u16 pd;
u16 uarn;
u8 qp_type;
+ u8 sl;
u8 unsolicited_write_recv : 1;
};
@@ -71,7 +72,7 @@ struct efa_com_create_cq_params {
/* cq physical base address in OS memory */
dma_addr_t dma_addr;
/* completion queue depth in # of entries */
- u16 cq_depth;
+ u16 sub_cq_depth;
u16 num_sub_cqs;
u16 uarn;
u16 eqn;
@@ -141,6 +142,7 @@ struct efa_com_get_device_attr_result {
u16 max_wr_rdma_sge;
u16 max_tx_batch;
u16 min_sq_depth;
+ u16 max_link_speed_gbps;
u8 db_bar;
};
diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h
index 2d8eb96eaa81..a4c9fd33da38 100644
--- a/drivers/infiniband/hw/efa/efa_io_defs.h
+++ b/drivers/infiniband/hw/efa/efa_io_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_IO_H_
@@ -10,6 +10,7 @@
#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
+#define EFA_IO_TX_DESC_INLINE_PBL_SIZE 1
enum efa_io_queue_type {
/* send queue (of a QP) */
@@ -25,6 +26,10 @@ enum efa_io_send_op_type {
EFA_IO_RDMA_READ = 1,
/* RDMA write */
EFA_IO_RDMA_WRITE = 2,
+ /* Fast MR registration */
+ EFA_IO_FAST_REG = 3,
+ /* Fast MR invalidation */
+ EFA_IO_FAST_INV = 4,
};
enum efa_io_comp_status {
@@ -34,15 +39,15 @@ enum efa_io_comp_status {
EFA_IO_COMP_STATUS_FLUSHED = 1,
/* Internal QP error */
EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2,
- /* Bad operation type */
- EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3,
+ /* Unsupported operation */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNSUPPORTED_OP = 3,
/* Bad AH */
EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4,
/* LKEY not registered or does not match IOVA */
EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5,
/* Message too long */
EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6,
- /* Destination ENI is down or does not run EFA */
+ /* RKEY not registered or does not match remote IOVA */
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7,
/* Connection was reset by remote side */
EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8,
@@ -54,8 +59,17 @@ enum efa_io_comp_status {
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11,
/* Unexpected status returned by responder */
EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12,
- /* Unresponsive remote - detected locally */
+ /* Unresponsive remote - was previously responsive */
EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13,
+ /* No valid AH at remote side (required for RDMA operations) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14,
+ /* Unreachable remote - never received a response */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15,
+};
+
+enum efa_io_frwr_pbl_mode {
+ EFA_IO_FRWR_INLINE_PBL = 0,
+ EFA_IO_FRWR_DIRECT_PBL = 1,
};
struct efa_io_tx_meta_desc {
@@ -95,13 +109,13 @@ struct efa_io_tx_meta_desc {
/*
* If inline_msg bit is set, length of inline message in bytes,
- * otherwise length of SGL (number of buffers).
+ * otherwise length of SGL (number of buffers).
*/
u16 length;
/*
- * immediate data: if has_imm is set, then this field is included
- * within Tx message and reported in remote Rx completion.
+ * immediate data: if has_imm is set, then this field is included within
+ * Tx message and reported in remote Rx completion.
*/
u32 immediate_data;
@@ -158,6 +172,63 @@ struct efa_io_rdma_req {
struct efa_io_tx_buf_desc local_mem[1];
};
+struct efa_io_fast_mr_reg_req {
+ /* Updated local key of the MR after lkey/rkey increment */
+ u32 lkey;
+
+ /*
+ * permissions
+ * 0 : local_write_enable - Local write permissions:
+ * must be set for RQ buffers and buffers posted for
+ * RDMA Read requests
+ * 1 : remote_write_enable - Remote write
+ * permissions: must be set to enable RDMA write to
+ * the region
+ * 2 : remote_read_enable - Remote read permissions:
+ * must be set to enable RDMA read from the region
+ * 7:3 : reserved2 - MBZ
+ */
+ u8 permissions;
+
+ /*
+ * control flags
+ * 4:0 : phys_page_size_shift - page size is (1 <<
+ * phys_page_size_shift)
+ * 6:5 : pbl_mode - enum efa_io_frwr_pbl_mode
+ * 7 : reserved - MBZ
+ */
+ u8 flags;
+
+ /* MBZ */
+ u8 reserved[2];
+
+ /* IO Virtual Address associated with this MR */
+ u64 iova;
+
+ /* Memory region length, in bytes */
+ u64 mr_length;
+
+ /* Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of physical page addresses (optimization
+ * for short region activation).
+ */
+ u64 inline_array[1];
+
+ /* points to PBL (Currently only direct) */
+ u64 dma_addr;
+ } pbl;
+};
+
+struct efa_io_fast_mr_inv_req {
+ /* Local key of the MR to invalidate */
+ u32 lkey;
+
+ /* MBZ */
+ u8 reserved[28];
+};
+
/*
* Tx WQE, composed of tx meta descriptors followed by either tx buffer
* descriptors or inline data
@@ -174,6 +245,12 @@ struct efa_io_tx_wqe {
/* RDMA local and remote memory addresses */
struct efa_io_rdma_req rdma_req;
+
+ /* Fast registration */
+ struct efa_io_fast_mr_reg_req reg_mr_req;
+
+ /* Fast invalidation */
+ struct efa_io_fast_mr_inv_req inv_mr_req;
} data;
};
@@ -208,7 +285,7 @@ struct efa_io_rx_desc {
struct efa_io_cdesc_common {
/*
* verbs-generated request ID, as provided in the completed tx or rx
- * descriptor.
+ * descriptor.
*/
u16 req_id;
@@ -221,7 +298,8 @@ struct efa_io_cdesc_common {
* 3 : has_imm - indicates that immediate data is
* present - for RX completions only
* 6:4 : op_type - enum efa_io_send_op_type
- * 7 : reserved31 - MBZ
+ * 7 : unsolicited - indicates that there is no
+ * matching request - for RDMA with imm. RX only
*/
u8 flags;
@@ -291,6 +369,13 @@ struct efa_io_rx_cdesc_ex {
/* tx_buf_desc */
#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
+/* fast_mr_reg_req */
+#define EFA_IO_FAST_MR_REG_REQ_LOCAL_WRITE_ENABLE_MASK BIT(0)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_WRITE_ENABLE_MASK BIT(1)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_READ_ENABLE_MASK BIT(2)
+#define EFA_IO_FAST_MR_REG_REQ_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
+#define EFA_IO_FAST_MR_REG_REQ_PBL_MODE_MASK GENMASK(6, 5)
+
/* rx_desc */
#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0)
#define EFA_IO_RX_DESC_FIRST_MASK BIT(30)
@@ -301,5 +386,6 @@ struct efa_io_rx_cdesc_ex {
#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4)
+#define EFA_IO_CDESC_COMMON_UNSOLICITED_MASK BIT(7)
#endif /* _EFA_IO_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index cc13415ff7e7..a8645a40730f 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -85,6 +85,8 @@ static const struct rdma_stat_desc efa_port_stats_descs[] = {
EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
};
+#define EFA_DEFAULT_LINK_SPEED_GBPS 100
+
#define EFA_CHUNK_PAYLOAD_SHIFT 12
#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
@@ -277,10 +279,47 @@ int efa_query_device(struct ib_device *ibdev,
return 0;
}
+static void efa_link_gbps_to_speed_and_width(u16 gbps,
+ enum ib_port_speed *speed,
+ enum ib_port_width *width)
+{
+ if (gbps >= 400) {
+ *width = IB_WIDTH_8X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 200) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 120) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 100) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_EDR;
+ } else if (gbps >= 60) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_DDR;
+ } else if (gbps >= 50) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 40) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 30) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_SDR;
+ } else {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_EDR;
+ }
+}
+
int efa_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct efa_dev *dev = to_edev(ibdev);
+ enum ib_port_speed link_speed;
+ enum ib_port_width link_width;
+ u16 link_gbps;
props->lmc = 1;
@@ -288,8 +327,10 @@ int efa_query_port(struct ib_device *ibdev, u32 port,
props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
- props->active_speed = IB_SPEED_EDR;
- props->active_width = IB_WIDTH_4X;
+ link_gbps = dev->dev_attr.max_link_speed_gbps ?: EFA_DEFAULT_LINK_SPEED_GBPS;
+ efa_link_gbps_to_speed_and_width(link_gbps, &link_speed, &link_width);
+ props->active_speed = link_speed;
+ props->active_width = link_width;
props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
props->max_msg_sz = dev->dev_attr.mtu;
@@ -676,7 +717,7 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
goto err_out;
}
- if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) {
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, unknown fields in udata\n");
err = -EINVAL;
@@ -732,6 +773,8 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
create_qp_params.rq_base_addr = qp->rq_dma_addr;
}
+ create_qp_params.sl = cmd.sl;
+
if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV)
create_qp_params.unsolicited_write_recv = true;
@@ -1167,7 +1210,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
params.uarn = cq->ucontext->uarn;
- params.cq_depth = entries;
+ params.sub_cq_depth = entries;
params.dma_addr = cq->dma_addr;
params.entry_size_in_bytes = cmd.cq_entry_size;
params.num_sub_cqs = cmd.num_sub_cqs;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index c52e6b2c9914..a442eca498b8 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -13235,7 +13235,7 @@ int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
/*
* Clear all interrupt sources on the chip.
*/
-void clear_all_interrupts(struct hfi1_devdata *dd)
+static void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index d861aa8fc640..8841db16bde7 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1404,7 +1404,6 @@ irqreturn_t receive_context_interrupt_napi(int irq, void *data);
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
void init_qsfp_int(struct hfi1_devdata *dd);
-void clear_all_interrupts(struct hfi1_devdata *dd);
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
void reset_interrupts(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 4ec66611a143..4106423a1b39 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -179,8 +179,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
hr_cq->cqn);
if (ret)
- dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
- hr_cq->cqn);
+ dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
+ ret, hr_cq->cqn);
xa_erase_irq(&cq_table->array, hr_cq->cqn);
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
index e8febb40f645..b869cdc54118 100644
--- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
@@ -5,6 +5,7 @@
#include <linux/debugfs.h>
#include <linux/device.h>
+#include <linux/pci.h>
#include "hns_roce_device.h"
@@ -86,7 +87,7 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev)
{
struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs;
- dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev),
+ dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev),
hns_roce_dbgfs_root);
create_sw_stat_debugfs(hr_dev, dbgfs->root);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 0b1e21cb6d2d..560a1d9de408 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -489,12 +489,6 @@ struct hns_roce_bank {
u32 next; /* Next ID to allocate. */
};
-struct hns_roce_idx_table {
- u32 *spare_idx;
- u32 head;
- u32 tail;
-};
-
struct hns_roce_qp_table {
struct hns_roce_hem_table qp_table;
struct hns_roce_hem_table irrl_table;
@@ -503,7 +497,7 @@ struct hns_roce_qp_table {
struct mutex scc_mutex;
struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
struct mutex bank_mutex;
- struct hns_roce_idx_table idx_table;
+ struct xarray dip_xa;
};
struct hns_roce_cq_table {
@@ -593,6 +587,7 @@ struct hns_roce_dev;
enum {
HNS_ROCE_FLUSH_FLAG = 0,
+ HNS_ROCE_STOP_FLUSH_FLAG = 1,
};
struct hns_roce_work {
@@ -656,6 +651,8 @@ struct hns_roce_qp {
enum hns_roce_cong_type cong_type;
u8 tc_mode;
u8 priority;
+ spinlock_t flush_lock;
+ struct hns_roce_dip *dip;
};
struct hns_roce_ib_iboe {
@@ -982,8 +979,6 @@ struct hns_roce_dev {
enum hns_roce_device_state state;
struct list_head qp_list; /* list of all qps on this dev */
spinlock_t qp_list_lock; /* protect qp_list */
- struct list_head dip_list; /* list of all dest ips on this dev */
- spinlock_t dip_list_lock; /* protect dip_list */
struct list_head pgdir_list;
struct mutex pgdir_mutex;
@@ -1289,6 +1284,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index c7c167e2a045..f84521be3bea 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -300,7 +300,7 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct device *dev = hr_dev->dev;
unsigned long mhop_obj = obj;
u32 l0_idx, l1_idx, l2_idx;
u32 chunk_ba_num;
@@ -331,14 +331,14 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
index->buf = l0_idx;
break;
default:
- ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n",
- table->type, mhop->hop_num);
+ dev_err(dev, "table %u not support mhop.hop_num = %u!\n",
+ table->type, mhop->hop_num);
return -EINVAL;
}
if (unlikely(index->buf >= table->num_hem)) {
- ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n",
- table->type, index->buf, table->num_hem);
+ dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n",
+ table->type, index->buf, table->num_hem);
return -EINVAL;
}
@@ -448,14 +448,14 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct device *dev = hr_dev->dev;
u32 step_idx;
int ret = 0;
if (index->inited & HEM_INDEX_L0) {
ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
if (ret) {
- ibdev_err(ibdev, "set HEM step 0 failed!\n");
+ dev_err(dev, "set HEM step 0 failed!\n");
goto out;
}
}
@@ -463,7 +463,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
if (index->inited & HEM_INDEX_L1) {
ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
if (ret) {
- ibdev_err(ibdev, "set HEM step 1 failed!\n");
+ dev_err(dev, "set HEM step 1 failed!\n");
goto out;
}
}
@@ -475,7 +475,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
step_idx = mhop->hop_num;
ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
if (ret)
- ibdev_err(ibdev, "set HEM step last failed!\n");
+ dev_err(dev, "set HEM step last failed!\n");
}
out:
return ret;
@@ -485,14 +485,14 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
unsigned long obj)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_hem_index index = {};
struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
int ret;
ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "calc hem config failed!\n");
+ dev_err(dev, "calc hem config failed!\n");
return ret;
}
@@ -504,7 +504,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "alloc mhop hem failed!\n");
+ dev_err(dev, "alloc mhop hem failed!\n");
goto out;
}
@@ -512,7 +512,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
if (table->type < HEM_TYPE_MTT) {
ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "set HEM address to HW failed!\n");
+ dev_err(dev, "set HEM address to HW failed!\n");
goto err_alloc;
}
}
@@ -575,7 +575,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop,
struct hns_roce_hem_index *index)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct device *dev = hr_dev->dev;
u32 hop_num = mhop->hop_num;
u32 chunk_ba_num;
u32 step_idx;
@@ -605,21 +605,21 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx);
if (ret)
- ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n",
- hop_num, ret);
+ dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n",
+ hop_num, ret);
if (index->inited & HEM_INDEX_L1) {
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1);
if (ret)
- ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n",
- ret);
+ dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n",
+ ret);
}
if (index->inited & HEM_INDEX_L0) {
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
if (ret)
- ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n",
- ret);
+ dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n",
+ ret);
}
}
}
@@ -629,14 +629,14 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
unsigned long obj,
int check_refcount)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_hem_index index = {};
struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
int ret;
ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
if (ret) {
- ibdev_err(ibdev, "calc hem config failed!\n");
+ dev_err(dev, "calc hem config failed!\n");
return;
}
@@ -672,8 +672,8 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
if (ret)
- dev_warn(dev, "failed to clear HEM base address, ret = %d.\n",
- ret);
+ dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n",
+ ret);
hns_roce_free_hem(hr_dev, table->hem[i]);
table->hem[i] = NULL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 24e906b9d3ae..697b17cca02e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -373,19 +373,12 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
static int check_send_valid(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
-
if (unlikely(hr_qp->state == IB_QPS_RESET ||
hr_qp->state == IB_QPS_INIT ||
- hr_qp->state == IB_QPS_RTR)) {
- ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
- hr_qp->state);
+ hr_qp->state == IB_QPS_RTR))
return -EINVAL;
- } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
- ibdev_err(ibdev, "failed to post WQE, dev state %d!\n",
- hr_dev->state);
+ else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
return -EIO;
- }
return 0;
}
@@ -582,7 +575,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
if (WARN_ON(ret))
return ret;
- hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
@@ -2560,20 +2553,19 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev)
free_link_table_buf(hr_dev, &priv->ext_llm);
}
-static void free_dip_list(struct hns_roce_dev *hr_dev)
+static void free_dip_entry(struct hns_roce_dev *hr_dev)
{
struct hns_roce_dip *hr_dip;
- struct hns_roce_dip *tmp;
- unsigned long flags;
+ unsigned long idx;
- spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
+ xa_lock(&hr_dev->qp_table.dip_xa);
- list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) {
- list_del(&hr_dip->node);
+ xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) {
+ __xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx);
kfree(hr_dip);
}
- spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
+ xa_unlock(&hr_dev->qp_table.dip_xa);
}
static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
@@ -2775,8 +2767,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
IB_QPS_INIT, NULL);
if (ret) {
- ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
+ ret);
return ret;
}
@@ -2981,7 +2973,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
hns_roce_free_link_table(hr_dev);
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09)
- free_dip_list(hr_dev);
+ free_dip_entry(hr_dev);
}
static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
@@ -3421,8 +3413,8 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
if (ret) {
- ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
+ ret);
return ret;
}
@@ -3461,9 +3453,9 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
ret = free_mr_post_send_lp_wqe(hr_qp);
if (ret) {
- ibdev_err(ibdev,
- "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
- hr_qp->qpn, ret);
+ ibdev_err_ratelimited(ibdev,
+ "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
+ hr_qp->qpn, ret);
break;
}
@@ -3474,16 +3466,16 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
while (cqe_cnt) {
npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
if (npolled < 0) {
- ibdev_err(ibdev,
- "failed to poll cqe for free mr, remain %d cqe.\n",
- cqe_cnt);
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr, remain %d cqe.\n",
+ cqe_cnt);
goto out;
}
if (time_after(jiffies, end)) {
- ibdev_err(ibdev,
- "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
- cqe_cnt);
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
+ cqe_cnt);
goto out;
}
cqe_cnt -= npolled;
@@ -4701,26 +4693,49 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask,
return 0;
}
+static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn)
+{
+ struct hns_roce_dip *hr_dip;
+ int ret;
+
+ hr_dip = xa_load(dip_xa, qpn);
+ if (hr_dip)
+ return 0;
+
+ hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL);
+ if (!hr_dip)
+ return -ENOMEM;
+
+ ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL));
+ if (ret)
+ kfree(hr_dip);
+
+ return ret;
+}
+
static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
u32 *dip_idx)
{
const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx;
- u32 *head = &hr_dev->qp_table.idx_table.head;
- u32 *tail = &hr_dev->qp_table.idx_table.tail;
+ struct xarray *dip_xa = &hr_dev->qp_table.dip_xa;
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct hns_roce_dip *hr_dip;
- unsigned long flags;
+ unsigned long idx;
int ret = 0;
- spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
+ ret = alloc_dip_entry(dip_xa, ibqp->qp_num);
+ if (ret)
+ return ret;
- spare_idx[*tail] = ibqp->qp_num;
- *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
+ xa_lock(dip_xa);
- list_for_each_entry(hr_dip, &hr_dev->dip_list, node) {
- if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt &&
+ !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
*dip_idx = hr_dip->dip_idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
goto out;
}
}
@@ -4728,19 +4743,24 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
/* If no dgid is found, a new dip and a mapping between dgid and
* dip_idx will be created.
*/
- hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC);
- if (!hr_dip) {
- ret = -ENOMEM;
- goto out;
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt)
+ continue;
+
+ *dip_idx = idx;
+ memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ hr_dip->dip_idx = idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
+ break;
}
- memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
- hr_dip->dip_idx = *dip_idx = spare_idx[*head];
- *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1);
- list_add_tail(&hr_dip->node, &hr_dev->dip_list);
+ /* This should never happen. */
+ if (WARN_ON_ONCE(!hr_qp->dip))
+ ret = -ENOSPC;
out:
- spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
+ xa_unlock(dip_xa);
return ret;
}
@@ -5061,10 +5081,8 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
int ret = 0;
- if (!check_qp_state(cur_state, new_state)) {
- ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
+ if (!check_qp_state(cur_state, new_state))
return -EINVAL;
- }
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
@@ -5325,7 +5343,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
/* SW pass context to HW */
ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
if (ret) {
- ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret);
+ ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret);
goto out;
}
@@ -5463,7 +5481,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
if (ret) {
- ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret);
+ ibdev_err_ratelimited(ibdev,
+ "failed to query QPC, ret = %d.\n",
+ ret);
ret = -EINVAL;
goto out;
}
@@ -5471,7 +5491,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
state = hr_reg_read(&context, QPC_QP_ST);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
- ibdev_err(ibdev, "Illegal ib_qp_state\n");
+ ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n");
ret = -EINVAL;
goto out;
}
@@ -5564,9 +5584,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
hr_qp->state, IB_QPS_RESET, udata);
if (ret)
- ibdev_err(ibdev,
- "failed to modify QP to RST, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(ibdev,
+ "failed to modify QP to RST, ret = %d.\n",
+ ret);
}
send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
@@ -5594,17 +5614,41 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
return ret;
}
+static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dip *hr_dip = hr_qp->dip;
+
+ xa_lock(&hr_dev->qp_table.dip_xa);
+
+ hr_dip->qp_cnt--;
+ if (!hr_dip->qp_cnt)
+ memset(hr_dip->dgid, 0, GID_LEN_V2);
+
+ xa_unlock(&hr_dev->qp_table.dip_xa);
+}
+
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ unsigned long flags;
int ret;
+ /* Make sure flush_cqe() is completed */
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+ flush_work(&hr_qp->flush_work.work);
+
+ if (hr_qp->cong_type == CONG_TYPE_DIP)
+ put_dip_ctx_idx(hr_dev, hr_qp);
+
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
if (ret)
- ibdev_err(&hr_dev->ib_dev,
- "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
- hr_qp->qpn, ret);
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
+ hr_qp->qpn, ret);
hns_roce_qp_destroy(hr_dev, hr_qp, udata);
@@ -5898,9 +5942,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret)
- ibdev_err(&hr_dev->ib_dev,
- "failed to process cmd when modifying CQ, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when modifying CQ, ret = %d.\n",
+ ret);
err_out:
if (ret)
@@ -5924,9 +5968,9 @@ static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
HNS_ROCE_CMD_QUERY_CQC, cqn);
if (ret) {
- ibdev_err(&hr_dev->ib_dev,
- "failed to process cmd when querying CQ, ret = %d.\n",
- ret);
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when querying CQ, ret = %d.\n",
+ ret);
goto err_mailbox;
}
@@ -5967,11 +6011,10 @@ err_mailbox:
return ret;
}
-static void hns_roce_irq_work_handle(struct work_struct *work)
+static void dump_aeqe_log(struct hns_roce_work *irq_work)
{
- struct hns_roce_work *irq_work =
- container_of(work, struct hns_roce_work, work);
- struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -6015,6 +6058,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
ibdev_warn(ibdev, "DB overflow.\n");
break;
+ case HNS_ROCE_EVENT_TYPE_MB:
+ break;
case HNS_ROCE_EVENT_TYPE_FLR:
ibdev_warn(ibdev, "function level reset.\n");
break;
@@ -6025,8 +6070,46 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
ibdev_err(ibdev, "invalid xrceth error.\n");
break;
default:
+ ibdev_info(ibdev, "Undefined event %d.\n",
+ irq_work->event_type);
break;
}
+}
+
+static void hns_roce_irq_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *irq_work =
+ container_of(work, struct hns_roce_work, work);
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ int event_type = irq_work->event_type;
+ u32 queue_num = irq_work->queue_num;
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ hns_roce_qp_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ hns_roce_cq_event(hr_dev, queue_num, event_type);
+ break;
+ default:
+ break;
+ }
+
+ dump_aeqe_log(irq_work);
kfree(irq_work);
}
@@ -6087,14 +6170,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
irqreturn_t aeqe_found = IRQ_NONE;
+ int num_aeqes = 0;
int event_type;
u32 queue_num;
int sub_type;
- while (aeqe) {
+ while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) {
/* Make sure we read AEQ entry after we have checked the
* ownership bit
*/
@@ -6105,25 +6188,12 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
- hns_roce_qp_event(hr_dev, queue_num, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- hns_roce_srq_event(hr_dev, queue_num, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- hns_roce_cq_event(hr_dev, queue_num, event_type);
+ hns_roce_flush_cqe(hr_dev, queue_num);
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@@ -6131,12 +6201,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
aeqe->event.cmd.status,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- case HNS_ROCE_EVENT_TYPE_FLR:
- break;
default:
- dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n",
- event_type, eq->eqn, eq->cons_index);
break;
}
@@ -6150,6 +6215,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq);
+ ++num_aeqes;
}
update_eq_db(eq);
@@ -6699,6 +6765,9 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
int ret;
int i;
+ if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET)
+ return -EINVAL;
+
other_num = hr_dev->caps.num_other_vectors;
comp_num = hr_dev->caps.num_comp_vectors;
aeq_num = hr_dev->caps.num_aeq_vectors;
@@ -7017,6 +7086,7 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}
+
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
@@ -7035,6 +7105,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
hr_dev->active = false;
hr_dev->dis_db = true;
+
+ rdma_user_mmap_disassociate(&hr_dev->ib_dev);
+
hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index c65f68a14a26..cbdbc9edbce6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -85,6 +85,11 @@
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
+/* budget must be smaller than aeqe_depth to guarantee that we update
+ * the ci before we polled all the entries in the EQ.
+ */
+#define HNS_AEQ_POLLING_BUDGET 64
+
enum {
HNS_ROCE_CMD_FLAG_IN = BIT(0),
HNS_ROCE_CMD_FLAG_OUT = BIT(1),
@@ -919,6 +924,7 @@ struct hns_roce_v2_rc_send_wqe {
#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
+#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
@@ -1342,7 +1348,7 @@ struct hns_roce_v2_priv {
struct hns_roce_dip {
u8 dgid[GID_LEN_V2];
u32 dip_idx;
- struct list_head node; /* all dips are on a list */
+ u32 qp_cnt;
};
struct fmea_ram_ecc {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 4cb0af733587..ae24c81c9812 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
pgprot_t prot;
int ret;
+ if (hr_dev->dis_db) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+ return -EPERM;
+ }
+
rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
if (!rdma_entry) {
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
@@ -1130,8 +1135,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
INIT_LIST_HEAD(&hr_dev->qp_list);
spin_lock_init(&hr_dev->qp_list_lock);
- INIT_LIST_HEAD(&hr_dev->dip_list);
- spin_lock_init(&hr_dev->dip_list_lock);
ret = hns_roce_register_device(hr_dev);
if (ret)
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 846da8c78b8b..bf30b3a65a9b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -138,8 +138,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr
key_to_hw_index(mr->key) &
(hr_dev->caps.num_mtpts - 1));
if (ret)
- ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
- ret);
+ ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n",
+ ret);
}
free_mr_pbl(hr_dev, mr);
@@ -435,15 +435,16 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
}
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
- unsigned int *sg_offset)
+ unsigned int *sg_offset_p)
{
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_mtr *mtr = &mr->pbl_mtr;
int ret, sg_num = 0;
- if (!IS_ALIGNED(*sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
+ if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
ibmr->page_size < HNS_HW_PAGE_SIZE ||
ibmr->page_size > HNS_HW_MAX_PAGE_SIZE)
return sg_num;
@@ -454,7 +455,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
if (!mr->page_list)
return sg_num;
- sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+ sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page);
if (sg_num < 1) {
ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 6b03ba671ff8..9e2e76c59406 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -39,6 +39,25 @@
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
+static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
+ u32 qpn)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_qp *qp;
+ unsigned long flags;
+
+ xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
+ qp = __hns_roce_qp_lookup(hr_dev, qpn);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);
+
+ if (!qp)
+ dev_warn(dev, "async event for bogus QP %08x\n", qpn);
+
+ return qp;
+}
+
static void flush_work_handle(struct work_struct *work)
{
struct hns_roce_work *flush_work = container_of(work,
@@ -71,11 +90,18 @@ static void flush_work_handle(struct work_struct *work)
void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_work *flush_work = &hr_qp->flush_work;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ /* Exit directly after destroy_qp() */
+ if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) {
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+ return;
+ }
- flush_work->hr_dev = hr_dev;
- INIT_WORK(&flush_work->work, flush_work_handle);
refcount_inc(&hr_qp->refcount);
queue_work(hr_dev->irq_workq, &flush_work->work);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
}
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
@@ -95,31 +121,28 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_qp *qp;
- xa_lock(&hr_dev->qp_table_xa);
- qp = __hns_roce_qp_lookup(hr_dev, qpn);
- if (qp)
- refcount_inc(&qp->refcount);
- xa_unlock(&hr_dev->qp_table_xa);
-
- if (!qp) {
- dev_warn(dev, "async event for bogus QP %08x\n", qpn);
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
return;
- }
- if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
- event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
- event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) {
- qp->state = IB_QPS_ERR;
+ qp->event(qp, (enum hns_roce_event)event_type);
- flush_cqe(hr_dev, qp);
- }
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
- qp->event(qp, (enum hns_roce_event)event_type);
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn)
+{
+ struct hns_roce_qp *qp;
+
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
+ return;
+
+ qp->state = IB_QPS_ERR;
+ flush_cqe(hr_dev, qp);
if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
@@ -1124,6 +1147,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct ib_udata *udata,
struct hns_roce_qp *hr_qp)
{
+ struct hns_roce_work *flush_work = &hr_qp->flush_work;
struct hns_roce_ib_create_qp_resp resp = {};
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_qp ucmd = {};
@@ -1132,9 +1156,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
mutex_init(&hr_qp->mutex);
spin_lock_init(&hr_qp->sq.lock);
spin_lock_init(&hr_qp->rq.lock);
+ spin_lock_init(&hr_qp->flush_lock);
hr_qp->state = IB_QPS_RESET;
hr_qp->flush_flag = 0;
+ flush_work->hr_dev = hr_dev;
+ INIT_WORK(&flush_work->work, flush_work_handle);
if (init_attr->create_flags)
return -EOPNOTSUPP;
@@ -1546,14 +1573,10 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
unsigned int reserved_from_bot;
unsigned int i;
- qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
- sizeof(u32), GFP_KERNEL);
- if (!qp_table->idx_table.spare_idx)
- return -ENOMEM;
-
mutex_init(&qp_table->scc_mutex);
mutex_init(&qp_table->bank_mutex);
xa_init(&hr_dev->qp_table_xa);
+ xa_init(&qp_table->dip_xa);
reserved_from_bot = hr_dev->caps.reserved_qps;
@@ -1578,7 +1601,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
ida_destroy(&hr_dev->qp_table.bank[i].ida);
+ xa_destroy(&hr_dev->qp_table.dip_xa);
mutex_destroy(&hr_dev->qp_table.bank_mutex);
mutex_destroy(&hr_dev->qp_table.scc_mutex);
- kfree(hr_dev->qp_table.idx_table.spare_idx);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index c9b8233f4b05..70c06ef65603 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -151,8 +151,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
srq->srqn);
if (ret)
- dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
- ret, srq->srqn);
+ dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
+ ret, srq->srqn);
xa_erase_irq(&srq_table->xa, srq->srqn);
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 69999d8d24f3..4186884c66e1 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -27,6 +27,19 @@ enum devx_obj_flags {
DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
DEVX_OBJ_FLAGS_DCT = 1 << 1,
DEVX_OBJ_FLAGS_CQ = 1 << 2,
+ DEVX_OBJ_FLAGS_HW_FREED = 1 << 3,
+};
+
+#define MAX_ASYNC_CMDS 8
+
+struct mlx5_async_cmd {
+ struct ib_uobject *uobject;
+ void *in;
+ int in_size;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ int err;
+ struct mlx5_async_work cb_work;
+ struct completion comp;
};
struct devx_async_data {
@@ -1405,7 +1418,9 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
*/
mlx5r_deref_wait_odp_mkey(&obj->mkey);
- if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+ if (obj->flags & DEVX_OBJ_FLAGS_HW_FREED)
+ ret = 0;
+ else if (obj->flags & DEVX_OBJ_FLAGS_DCT)
ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
@@ -2595,6 +2610,82 @@ void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
}
}
+static void devx_async_destroy_cb(int status, struct mlx5_async_work *context)
+{
+ struct mlx5_async_cmd *devx_out = container_of(context,
+ struct mlx5_async_cmd, cb_work);
+ struct devx_obj *obj = devx_out->uobject->object;
+
+ if (!status)
+ obj->flags |= DEVX_OBJ_FLAGS_HW_FREED;
+
+ complete(&devx_out->comp);
+}
+
+static void devx_async_destroy(struct mlx5_ib_dev *dev,
+ struct mlx5_async_cmd *cmd)
+{
+ init_completion(&cmd->comp);
+ cmd->err = mlx5_cmd_exec_cb(&dev->async_ctx, cmd->in, cmd->in_size,
+ &cmd->out, sizeof(cmd->out),
+ devx_async_destroy_cb, &cmd->cb_work);
+}
+
+static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
+{
+ if (!cmd->err)
+ wait_for_completion(&cmd->comp);
+ atomic_set(&cmd->uobject->usecnt, 0);
+}
+
+void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
+{
+ struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS];
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *device = ucontext->device;
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct ib_uobject *uobject;
+ struct devx_obj *obj;
+ int head = 0;
+ int tail = 0;
+
+ list_for_each_entry(uobject, &ufile->uobjects, list) {
+ WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
+
+ /*
+ * Currently we only support QP destruction, if other objects
+ * are to be destroyed need to add type synchronization to the
+ * cleanup algorithm and handle pre/post FW cleanup for the
+ * new types if needed.
+ */
+ if (uobj_get_object_id(uobject) != MLX5_IB_OBJECT_DEVX_OBJ ||
+ (get_dec_obj_type(uobject->object, MLX5_EVENT_TYPE_MAX) !=
+ MLX5_OBJ_TYPE_QP)) {
+ atomic_set(&uobject->usecnt, 0);
+ continue;
+ }
+
+ obj = uobject->object;
+
+ async_cmd[tail % MAX_ASYNC_CMDS].in = obj->dinbox;
+ async_cmd[tail % MAX_ASYNC_CMDS].in_size = obj->dinlen;
+ async_cmd[tail % MAX_ASYNC_CMDS].uobject = uobject;
+
+ devx_async_destroy(dev, &async_cmd[tail % MAX_ASYNC_CMDS]);
+ tail++;
+
+ if (tail - head == MAX_ASYNC_CMDS) {
+ devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
+ head++;
+ }
+ }
+
+ while (head != tail) {
+ devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
+ head++;
+ }
+}
+
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
index ee2213275fd6..1344bf4c9d21 100644
--- a/drivers/infiniband/hw/mlx5/devx.h
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -28,6 +28,7 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
#else
static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
{
@@ -41,5 +42,8 @@ static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
{
}
+static inline void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
+{
+}
#endif
#endif /* _MLX5_IB_DEVX_H */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1b6c5e37d169..2453ae4384a7 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -278,7 +278,13 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
goto done;
}
- err = query_ib_ppcnt(mdev, mdev_port_num, 0, out_cnt, sz, 0);
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI)
+ err = query_ib_ppcnt(mdev, mdev_port_num, port_num,
+ out_cnt, sz, 0);
+ else
+ err = query_ib_ppcnt(mdev, mdev_port_num, 0,
+ out_cnt, sz, 0);
+
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4999239c8f41..bc7930d0c564 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1182,6 +1182,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
+
+ if (MLX5_CAP_GEN_2(mdev, dp_ordering_force) &&
+ (MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc)))
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP;
}
if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) {
@@ -2997,7 +3005,6 @@ unlock:
static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
- int port;
int ret;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
@@ -3013,10 +3020,6 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
return ret;
}
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
- INIT_WORK(&devr->ports[port].pkey_change_work,
- pkey_change_handler);
-
mutex_init(&devr->cq_lock);
mutex_init(&devr->srq_lock);
@@ -3026,16 +3029,6 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
- int port;
-
- /*
- * Make sure no change P_Key work items are still executing.
- *
- * At this stage, the mlx5_ib_event should be unregistered
- * and it ensures that no new works are added.
- */
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
- cancel_work_sync(&devr->ports[port].pkey_change_work);
/* After s0/s1 init, they are not unset during the device lifetime. */
if (devr->s1) {
@@ -3211,12 +3204,14 @@ static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev,
lag_events);
struct mlx5_core_dev *mdev = dev->mdev;
+ struct ib_device *ibdev = &dev->ib_dev;
+ struct net_device *old_ndev = NULL;
struct mlx5_ib_port *port;
struct net_device *ndev;
- int i, err;
- int portnum;
+ u32 portnum = 0;
+ int ret = 0;
+ int i;
- portnum = 0;
switch (event) {
case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE:
ndev = data;
@@ -3232,19 +3227,24 @@ static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
}
}
}
- err = ib_device_set_netdev(&dev->ib_dev, ndev,
- portnum + 1);
- dev_put(ndev);
- if (err)
- return err;
- /* Rescan gids after new netdev assignment */
- rdma_roce_rescan_device(&dev->ib_dev);
+ old_ndev = ib_device_get_netdev(ibdev, portnum + 1);
+ ret = ib_device_set_netdev(ibdev, ndev, portnum + 1);
+ if (ret)
+ goto out;
+
+ if (old_ndev)
+ roce_del_all_netdev_gids(ibdev, portnum + 1,
+ old_ndev);
+ rdma_roce_rescan_port(ibdev, portnum + 1);
}
break;
default:
return NOTIFY_DONE;
}
- return NOTIFY_OK;
+
+out:
+ dev_put(old_ndev);
+ return notifier_from_errno(ret);
}
static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev)
@@ -4134,6 +4134,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
+ .ufile_hw_cleanup = mlx5_ib_ufile_hw_cleanup,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
@@ -4464,6 +4465,13 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
+ INIT_WORK(&devr->ports[port].pkey_change_work,
+ pkey_change_handler);
+
dev->mdev_events.notifier_call = mlx5_ib_event;
mlx5_notifier_register(dev->mdev, &dev->mdev_events);
@@ -4474,8 +4482,14 @@ static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+
mlx5r_macsec_event_unregister(dev);
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
+ cancel_work_sync(&devr->ports[port].pkey_change_work);
}
void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
@@ -4565,9 +4579,6 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_dev_res_init,
mlx5_ib_dev_res_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
- mlx5_ib_stage_dev_notifier_init,
- mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
mlx5_ib_odp_init_one,
mlx5_ib_odp_cleanup_one),
@@ -4592,6 +4603,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
@@ -4628,9 +4642,6 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_dev_res_init,
mlx5_ib_dev_res_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
- mlx5_ib_stage_dev_notifier_init,
- mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_counters_init,
mlx5_ib_counters_cleanup),
@@ -4652,6 +4663,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 23fd72f7f63d..a01b592aa716 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -521,6 +521,7 @@ struct mlx5_ib_qp {
struct mlx5_bf bf;
u8 has_rq:1;
u8 is_rss:1;
+ u8 is_ooo_rq:1;
/* only for user space QPs. For kernel
* we have it from the bf object
@@ -972,7 +973,6 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_QP,
MLX5_IB_STAGE_SRQ,
MLX5_IB_STAGE_DEVICE_RESOURCES,
- MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
@@ -981,6 +981,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_WHITELIST_UID,
MLX5_IB_STAGE_IB_REG,
+ MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_RESTRACK,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 10ce3b44f645..a43eba9d3572 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1960,7 +1960,7 @@ static int atomic_size_to_mode(int size_mask)
}
static int get_atomic_mode(struct mlx5_ib_dev *dev,
- enum ib_qp_type qp_type)
+ struct mlx5_ib_qp *qp)
{
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
@@ -1970,7 +1970,7 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
if (!atomic)
return -EOPNOTSUPP;
- if (qp_type == MLX5_IB_QPT_DCT)
+ if (qp->type == MLX5_IB_QPT_DCT)
atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
else
atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
@@ -1984,6 +1984,10 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
+ /* OOO DP QPs do not support larger than 8-Bytes atomic operations */
+ if (atomic_mode > MLX5_ATOMIC_MODE_8B && qp->is_ooo_rq)
+ atomic_mode = MLX5_ATOMIC_MODE_8B;
+
return atomic_mode;
}
@@ -2839,6 +2843,29 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return 0;
}
+static bool get_dp_ooo_cap(struct mlx5_core_dev *mdev, enum ib_qp_type qp_type)
+{
+ if (!MLX5_CAP_GEN_2(mdev, dp_ordering_force))
+ return false;
+
+ switch (qp_type) {
+ case IB_QPT_RC:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc);
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc);
+ case IB_QPT_UC:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc);
+ case IB_QPT_UD:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud);
+ case MLX5_IB_QPT_DCI:
+ case MLX5_IB_QPT_DCT:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc);
+ default:
+ return false;
+ }
+}
+
static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
bool cond, struct mlx5_ib_qp *qp)
{
@@ -3365,7 +3392,7 @@ static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp,
if (access_flags & IB_ACCESS_REMOTE_ATOMIC) {
int atomic_mode;
- atomic_mode = get_atomic_mode(dev, qp->type);
+ atomic_mode = get_atomic_mode(dev, qp);
if (atomic_mode < 0)
return -EOPNOTSUPP;
@@ -4316,6 +4343,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
MLX5_SET(qpc, qpc, deth_sqpn, 1);
+ if (qp->is_ooo_rq && cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ MLX5_SET(qpc, qpc, dp_ordering_1, 1);
+ MLX5_SET(qpc, qpc, dp_ordering_force, 1);
+ }
+
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
@@ -4531,7 +4563,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
int atomic_mode;
- atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
+ atomic_mode = get_atomic_mode(dev, qp);
if (atomic_mode < 0)
return -EOPNOTSUPP;
@@ -4573,6 +4605,10 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7);
+ if (qp->is_ooo_rq) {
+ MLX5_SET(dctc, dctc, dp_ordering_1, 1);
+ MLX5_SET(dctc, dctc, dp_ordering_force, 1);
+ }
err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
MLX5_ST_SZ_BYTES(create_dct_in), out,
@@ -4676,11 +4712,16 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
min(udata->inlen, sizeof(ucmd))))
return -EFAULT;
- if (ucmd.comp_mask ||
+ if (ucmd.comp_mask & ~MLX5_IB_MODIFY_QP_OOO_DP ||
memchr_inv(&ucmd.burst_info.reserved, 0,
sizeof(ucmd.burst_info.reserved)))
return -EOPNOTSUPP;
+ if (ucmd.comp_mask & MLX5_IB_MODIFY_QP_OOO_DP) {
+ if (!get_dp_ooo_cap(dev->mdev, qp->type))
+ return -EOPNOTSUPP;
+ qp->is_ooo_rq = 1;
+ }
}
if (qp->type == IB_QPT_GSI)