summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 21:05:10 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 21:05:10 +0100
commit7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899 (patch)
treecbc3ec5d45b04666c24f7c0b1df04a85d29c7d0f
parentMerge tag 'dmaengine-4.16-rc1' of git://git.infradead.org/users/vkoul/slave-dma (diff)
parentMerge tag v4.15 of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/lin... (diff)
downloadlinux-7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899.tar.xz
linux-7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull RDMA subsystem updates from Jason Gunthorpe: "Overall this cycle did not have any major excitement, and did not require any shared branch with netdev. Lots of driver updates, particularly of the scale-up and performance variety. The largest body of core work was Parav's patches fixing and restructing some of the core code to make way for future RDMA containerization. Summary: - misc small driver fixups to bnxt_re/hfi1/qib/hns/ocrdma/rdmavt/vmw_pvrdma/nes - several major feature adds to bnxt_re driver: SRIOV VF RoCE support, HugePages support, extended hardware stats support, and SRQ support - a notable number of fixes to the i40iw driver from debugging scale up testing - more work to enable the new hip08 chip in the hns driver - misc small ULP fixups to srp/srpt//ipoib - preparation for srp initiator and target to support the RDMA-CM protocol for connections - add RDMA-CM support to srp initiator, srp target is still a WIP - fixes for a couple of places where ipoib could spam the dmesg log - fix encode/decode of FDR/EDR data rates in the core - many patches from Parav with ongoing work to clean up inconsistencies and bugs in RoCE support around the rdma_cm - mlx5 driver support for the userspace features 'thread domain', 'wallclock timestamps' and 'DV Direct Connected transport'. Support for the firmware dual port rocee capability - core support for more than 32 rdma devices in the char dev allocation - kernel doc updates from Randy Dunlap - new netlink uAPI for inspecting RDMA objects similar in spirit to 'ss' - one minor change to the kobject code acked by Greg KH" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (259 commits) RDMA/nldev: Provide detailed QP information RDMA/nldev: Provide global resource utilization RDMA/core: Add resource tracking for create and destroy PDs RDMA/core: Add resource tracking for create and destroy CQs RDMA/core: Add resource tracking for create and destroy QPs RDMA/restrack: Add general infrastructure to track RDMA resources RDMA/core: Save kernel caller name when creating PD and CQ objects RDMA/core: Use the MODNAME instead of the function name for pd callers RDMA: Move enum ib_cq_creation_flags to uapi headers IB/rxe: Change RDMA_RXE kconfig to use select IB/qib: remove qib_keys.c IB/mthca: remove mthca_user.h RDMA/cm: Fix access to uninitialized variable RDMA/cma: Use existing netif_is_bond_master function IB/core: Avoid SGID attributes query while converting GID from OPA to IB RDMA/mlx5: Avoid memory leak in case of XRCD dealloc failure IB/umad: Fix use of unprotected device pointer IB/iser: Combine substrings for three messages IB/iser: Delete an unnecessary variable initialisation in iser_send_data_out() IB/iser: Delete an error message for a failed memory allocation in iser_send_data_out() ...
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/addr.c65
-rw-r--r--drivers/infiniband/core/cache.c23
-rw-r--r--drivers/infiniband/core/cm.c227
-rw-r--r--drivers/infiniband/core/cma.c252
-rw-r--r--drivers/infiniband/core/cma_configfs.c2
-rw-r--r--drivers/infiniband/core/core_priv.h52
-rw-r--r--drivers/infiniband/core/cq.c39
-rw-r--r--drivers/infiniband/core/device.c42
-rw-r--r--drivers/infiniband/core/fmr_pool.c12
-rw-r--r--drivers/infiniband/core/iwpm_util.c1
-rw-r--r--drivers/infiniband/core/mad.c1
-rw-r--r--drivers/infiniband/core/netlink.c10
-rw-r--r--drivers/infiniband/core/nldev.c394
-rw-r--r--drivers/infiniband/core/restrack.c164
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c13
-rw-r--r--drivers/infiniband/core/sa_query.c18
-rw-r--r--drivers/infiniband/core/security.c10
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucm.c73
-rw-r--r--drivers/infiniband/core/ucma.c19
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/user_mad.c123
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c14
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c19
-rw-r--r--drivers/infiniband/core/uverbs_main.c95
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c3
-rw-r--r--drivers/infiniband/core/verbs.c312
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h43
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c145
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h39
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c404
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h20
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c251
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c463
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h78
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c5
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h7
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c9
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c141
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h91
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h127
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c27
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c36
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h4
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c87
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c16
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c64
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h25
-rw-r--r--drivers/infiniband/hw/hfi1/init.c2
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c6
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c10
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c6
-rw-r--r--drivers/infiniband/hw/hns/Makefile2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h103
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_eq.c759
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_eq.h134
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c758
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h44
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c1837
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h283
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c72
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c68
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c25
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c13
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c18
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c50
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c5
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx4/main.c19
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c20
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c83
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c23
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1353
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h111
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c9
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c432
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h112
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c19
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c10
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c15
-rw-r--r--drivers/infiniband/hw/qib/qib.h8
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c68
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c235
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c13
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c21
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c11
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c15
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c27
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c16
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_qp.h42
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c10
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h6
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c98
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c16
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c7
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h1
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c795
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h43
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c962
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h100
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c91
-rw-r--r--include/linux/mlx5/device.h16
-rw-r--r--include/linux/mlx5/driver.h43
-rw-r--r--include/linux/mlx5/mlx5_ifc.h32
-rw-r--r--include/linux/mlx5/qp.h12
-rw-r--r--include/linux/mlx5/vport.h4
-rw-r--r--include/rdma/ib_addr.h38
-rw-r--r--include/rdma/ib_sa.h10
-rw-r--r--include/rdma/ib_verbs.h64
-rw-r--r--include/rdma/opa_addr.h16
-rw-r--r--include/rdma/rdma_cm.h19
-rw-r--r--include/rdma/rdma_cm_ib.h8
-rw-r--r--include/rdma/rdma_vt.h31
-rw-r--r--include/rdma/restrack.h157
-rw-r--r--include/scsi/srp.h17
-rw-r--r--include/uapi/rdma/bnxt_re-abi.h9
-rw-r--r--include/uapi/rdma/ib_user_verbs.h11
-rw-r--r--include/uapi/rdma/mlx4-abi.h7
-rw-r--r--include/uapi/rdma/mlx5-abi.h53
-rw-r--r--include/uapi/rdma/rdma_netlink.h49
-rw-r--r--include/uapi/rdma/vmw_pvrdma-abi.h12
-rw-r--r--lib/kobject.c2
-rw-r--r--net/rds/ib.c6
187 files changed, 10211 insertions, 3999 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 7b5ef9a58c38..88cdd2925cef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6892,7 +6892,7 @@ M: Jason Gunthorpe <jgg@mellanox.com>
L: linux-rdma@vger.kernel.org
W: http://www.openfabrics.org/
Q: http://patchwork.kernel.org/project/linux-rdma/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
S: Supported
F: Documentation/devicetree/bindings/infiniband/
F: Documentation/infiniband/
@@ -11218,7 +11218,8 @@ S: Maintained
F: drivers/firmware/qemu_fw_cfg.c
QIB DRIVER
-M: Mike Marciniszyn <infinipath@intel.com>
+M: Dennis Dalessandro <dennis.dalessandro@intel.com>
+M: Mike Marciniszyn <mike.marciniszyn@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/qib/
@@ -11245,7 +11246,6 @@ F: include/linux/qed/
F: drivers/net/ethernet/qlogic/qede/
QLOGIC QL4xxx RDMA DRIVER
-M: Ram Amrani <Ram.Amrani@cavium.com>
M: Michal Kalderon <Michal.Kalderon@cavium.com>
M: Ariel Elior <Ariel.Elior@cavium.com>
L: linux-rdma@vger.kernel.org
@@ -11507,6 +11507,7 @@ F: drivers/net/ethernet/rdc/r6040.c
RDMAVT - RDMA verbs software
M: Dennis Dalessandro <dennis.dalessandro@intel.com>
+M: Mike Marciniszyn <mike.marciniszyn@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/sw/rdmavt
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 504b926552c6..f69833db0a32 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- security.o nldev.o
+ security.o nldev.o restrack.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index f4e8185bccd3..a5b4cf030c11 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -243,8 +243,7 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(const struct sockaddr *addr,
- struct rdma_dev_addr *dev_addr,
- u16 *vlan_id)
+ struct rdma_dev_addr *dev_addr)
{
struct net_device *dev;
@@ -266,9 +265,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
return -EADDRNOTAVAIL;
rdma_copy_addr(dev_addr, dev, NULL);
- dev_addr->bound_dev_if = dev->ifindex;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
#if IS_ENABLED(CONFIG_IPV6)
@@ -279,9 +275,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
rdma_copy_addr(dev_addr, dev, NULL);
- dev_addr->bound_dev_if = dev->ifindex;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
@@ -481,7 +474,7 @@ static int addr_resolve_neigh(struct dst_entry *dst,
if (dst->dev->flags & IFF_LOOPBACK) {
int ret;
- ret = rdma_translate_ip(dst_in, addr, NULL);
+ ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
MAX_ADDR_LEN);
@@ -558,7 +551,7 @@ static int addr_resolve(struct sockaddr *src_in,
}
if (ndev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip(dst_in, addr, NULL);
+ ret = rdma_translate_ip(dst_in, addr);
/*
* Put the loopback device and get the translated
* device instead.
@@ -744,7 +737,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
- struct rdma_dev_addr *addr;
struct completion comp;
int status;
};
@@ -752,39 +744,31 @@ struct resolve_cb_context {
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
- if (!status)
- memcpy(((struct resolve_cb_context *)context)->addr,
- addr, sizeof(struct rdma_dev_addr));
((struct resolve_cb_context *)context)->status = status;
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id, int *if_index,
+ u8 *dmac, const struct net_device *ndev,
int *hoplimit)
{
- int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
- struct net_device *dev;
-
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
-
+ int ret;
rdma_gid2ip(&sgid_addr._sockaddr, sgid);
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- if (if_index)
- dev_addr.bound_dev_if = *if_index;
+ dev_addr.bound_dev_if = ndev->ifindex;
dev_addr.net = &init_net;
- ctx.addr = &dev_addr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
@@ -798,42 +782,9 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
return ret;
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
- dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
- if (!dev)
- return -ENODEV;
- if (if_index)
- *if_index = dev_addr.bound_dev_if;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
- if (hoplimit)
- *hoplimit = dev_addr.hoplimit;
- dev_put(dev);
- return ret;
-}
-EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
-
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
-{
- int ret = 0;
- struct rdma_dev_addr dev_addr;
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } gid_addr;
-
- rdma_gid2ip(&gid_addr._sockaddr, sgid);
-
- memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.net = &init_net;
- ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
- if (ret)
- return ret;
-
- memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
- return ret;
+ *hoplimit = dev_addr.hoplimit;
+ return 0;
}
-EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 77515638c55c..e9a409d7f4e2 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -573,27 +573,24 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
struct ib_gid_attr attr;
if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
- goto next;
+ continue;
if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
- goto next;
+ continue;
memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
- if (filter(gid, &attr, context))
+ if (filter(gid, &attr, context)) {
found = true;
-
-next:
- if (found)
+ if (index)
+ *index = i;
break;
+ }
}
read_unlock_irqrestore(&table->rwlock, flags);
if (!found)
return -ENOENT;
-
- if (index)
- *index = i;
return 0;
}
@@ -824,12 +821,7 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
if (err)
return err;
- err = roce_rescan_device(ib_dev);
-
- if (err) {
- gid_table_cleanup_one(ib_dev);
- gid_table_release_one(ib_dev);
- }
+ rdma_roce_rescan_device(ib_dev);
return err;
}
@@ -883,7 +875,6 @@ int ib_find_gid_by_filter(struct ib_device *device,
port_num, filter,
context, index);
}
-EXPORT_SYMBOL(ib_find_gid_by_filter);
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index f6b159d79977..e6749157fd86 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -452,13 +452,14 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
-static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
- struct ib_grh *grh, struct cm_av *av)
+static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+ struct ib_grh *grh, struct cm_av *av)
{
av->port = port;
av->pkey_index = wc->pkey_index;
- ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
- grh, &av->ah_attr);
+ return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
+ port->port_num, wc,
+ grh, &av->ah_attr);
}
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
@@ -494,8 +495,11 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
return ret;
av->port = port;
- ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
- &av->ah_attr);
+ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
+ &av->ah_attr);
+ if (ret)
+ return ret;
+
av->timeout = path->packet_life_time + 1;
spin_lock_irqsave(&cm.lock, flags);
@@ -1560,6 +1564,35 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
return pkey;
}
+/**
+ * Convert OPA SGID to IB SGID
+ * ULPs (such as IPoIB) do not understand OPA GIDs and will
+ * reject them as the local_gid will not match the sgid. Therefore,
+ * change the pathrec's SGID to an IB SGID.
+ *
+ * @work: Work completion
+ * @path: Path record
+ */
+static void cm_opa_to_ib_sgid(struct cm_work *work,
+ struct sa_path_rec *path)
+{
+ struct ib_device *dev = work->port->cm_dev->ib_device;
+ u8 port_num = work->port->port_num;
+
+ if (rdma_cap_opa_ah(dev, port_num) &&
+ (ib_is_opa_gid(&path->sgid))) {
+ union ib_gid sgid;
+
+ if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
+ dev_warn(&dev->dev,
+ "Error updating sgid in CM request\n");
+ return;
+ }
+
+ path->sgid = sgid;
+ }
+}
+
static void cm_format_req_event(struct cm_work *work,
struct cm_id_private *cm_id_priv,
struct ib_cm_id *listen_id)
@@ -1573,10 +1606,13 @@ static void cm_format_req_event(struct cm_work *work,
param->bth_pkey = cm_get_bth_pkey(work);
param->port = cm_id_priv->av.port->port_num;
param->primary_path = &work->path[0];
- if (cm_req_has_alt_path(req_msg))
+ cm_opa_to_ib_sgid(work, param->primary_path);
+ if (cm_req_has_alt_path(req_msg)) {
param->alternate_path = &work->path[1];
- else
+ cm_opa_to_ib_sgid(work, param->alternate_path);
+ } else {
param->alternate_path = NULL;
+ }
param->remote_ca_guid = req_msg->local_ca_guid;
param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
@@ -1826,9 +1862,11 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
cm_id_priv->id.remote_id = req_msg->local_comm_id;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto destroy;
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
@@ -1841,9 +1879,10 @@ static int cm_req_handler(struct cm_work *work)
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
+ pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
+ be32_to_cpu(cm_id->local_id));
ret = -EINVAL;
- kfree(cm_id_priv->timewait_info);
- goto destroy;
+ goto free_timeinfo;
}
cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1861,56 +1900,50 @@ static int cm_req_handler(struct cm_work *work)
work->port->port_num,
grh->sgid_index,
&gid, &gid_attr);
- if (!ret) {
- if (gid_attr.ndev) {
- work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
- } else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
- }
- if (cm_req_has_alt_path(req_msg))
- work->path[1].rec_type = work->path[0].rec_type;
- cm_format_paths_from_req(req_msg, &work->path[0],
- &work->path[1]);
- if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
- sa_path_set_dmac(&work->path[0],
- cm_id_priv->av.ah_attr.roce.dmac);
- work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
- cm_id_priv);
+ if (ret) {
+ ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
+ goto rejected;
+ }
+
+ if (gid_attr.ndev) {
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
+ dev_put(gid_attr.ndev);
+ } else {
+ cm_path_set_rec_type(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ &work->path[0],
+ &req_msg->primary_local_gid);
}
+ if (cm_req_has_alt_path(req_msg))
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1]);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+ cm_id_priv);
if (ret) {
- int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0,
- &work->path[0].sgid,
- &gid_attr);
- if (!err && gid_attr.ndev) {
- work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
- } else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
- }
- if (cm_req_has_alt_path(req_msg))
- work->path[1].rec_type = work->path[0].rec_type;
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
- &work->path[0].sgid, sizeof work->path[0].sgid,
- NULL, 0);
+ int err;
+
+ err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid,
+ NULL);
+ if (err)
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ NULL, 0, NULL, 0);
+ else
+ ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ &work->path[0].sgid,
+ sizeof(work->path[0].sgid),
+ NULL, 0);
goto rejected;
}
if (cm_req_has_alt_path(req_msg)) {
@@ -1919,7 +1952,7 @@ static int cm_req_handler(struct cm_work *work)
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
- sizeof work->path[0].sgid, NULL, 0);
+ sizeof(work->path[0].sgid), NULL, 0);
goto rejected;
}
}
@@ -1945,6 +1978,8 @@ static int cm_req_handler(struct cm_work *work)
rejected:
atomic_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
+free_timeinfo:
+ kfree(cm_id_priv->timewait_info);
destroy:
ib_destroy_cm_id(cm_id);
return ret;
@@ -1997,6 +2032,8 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
+ pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2063,6 +2100,8 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
+ pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
goto error;
}
@@ -2170,6 +2209,8 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
+ pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
+ be32_to_cpu(rep_msg->remote_comm_id));
return -EINVAL;
}
@@ -2183,6 +2224,10 @@ static int cm_rep_handler(struct cm_work *work)
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
+ __func__, cm_id_priv->id.state,
+ be32_to_cpu(rep_msg->local_comm_id),
+ be32_to_cpu(rep_msg->remote_comm_id));
goto error;
}
@@ -2196,6 +2241,8 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("%s: Failed to insert remote id %d\n", __func__,
+ be32_to_cpu(rep_msg->remote_comm_id));
goto error;
}
/* Check for a stale connection. */
@@ -2213,6 +2260,10 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
+ __func__, be32_to_cpu(rep_msg->local_comm_id),
+ be32_to_cpu(rep_msg->remote_comm_id));
+
if (cur_cm_id_priv) {
cm_id = &cur_cm_id_priv->id;
ib_send_cm_dreq(cm_id, NULL, 0);
@@ -2359,6 +2410,8 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_ESTABLISHED) {
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2428,6 +2481,8 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
if (cm_id->state != IB_CM_DREQ_RCVD) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
kfree(data);
+ pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
+ __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
return -EINVAL;
}
@@ -2493,6 +2548,9 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
+ pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
+ __func__, be32_to_cpu(dreq_msg->local_comm_id),
+ be32_to_cpu(dreq_msg->remote_comm_id));
return -EINVAL;
}
@@ -2535,6 +2593,9 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2638,6 +2699,8 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
cm_enter_timewait(cm_id_priv);
break;
default:
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2748,6 +2811,9 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
goto out;
}
@@ -2811,6 +2877,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
}
/* fall through */
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
goto error1;
}
@@ -2912,6 +2981,9 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
/* fall through */
default:
+ pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
goto out;
}
@@ -3085,6 +3157,12 @@ static int cm_lap_handler(struct cm_work *work)
if (!cm_id_priv)
return -EINVAL;
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto deref;
+
param = &work->cm_event.param.lap_rcvd;
memset(&work->path[0], 0, sizeof(work->path[1]));
cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -3131,9 +3209,6 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
cm_id_priv);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -3386,6 +3461,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
struct cm_sidr_req_msg *sidr_req_msg;
struct ib_wc *wc;
+ int ret;
cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
if (IS_ERR(cm_id))
@@ -3398,9 +3474,12 @@ static int cm_sidr_req_handler(struct cm_work *work)
wc = work->mad_recv_wc->wc;
cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
cm_id_priv->av.dgid.global.interface_id = 0;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto out;
+
cm_id_priv->id.remote_id = sidr_req_msg->request_id;
cm_id_priv->tid = sidr_req_msg->hdr.tid;
atomic_inc(&cm_id_priv->work_count);
@@ -3692,6 +3771,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
+ pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3727,6 +3807,8 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
+ pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
+ be32_to_cpu(cm_id->local_id), cm_id->state);
ret = -EINVAL;
break;
}
@@ -3924,6 +4006,9 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3971,6 +4056,9 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -4030,6 +4118,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
+ __func__, be32_to_cpu(cm_id_priv->id.local_id),
+ cm_id_priv->id.state);
ret = -EINVAL;
break;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6294a7001d33..e66963ca58bd 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -601,7 +601,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;
if (addr->sa_family != AF_IB) {
- ret = rdma_translate_ip(addr, dev_addr, NULL);
+ ret = rdma_translate_ip(addr, dev_addr);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
@@ -612,11 +612,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
static inline int cma_validate_port(struct ib_device *device, u8 port,
enum ib_gid_type gid_type,
- union ib_gid *gid, int dev_type,
- int bound_if_index)
+ union ib_gid *gid,
+ struct rdma_id_private *id_priv)
{
- int ret = -ENODEV;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int bound_if_index = dev_addr->bound_dev_if;
+ int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
+ int ret = -ENODEV;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
return ret;
@@ -624,11 +627,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port))
- ndev = dev_get_by_index(&init_net, bound_if_index);
- else
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ return ret;
+ } else {
gid_type = IB_GID_TYPE_IB;
-
+ }
ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, NULL);
@@ -669,8 +674,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
listen_id_priv->gid_type, gidp,
- dev_addr->dev_type,
- dev_addr->bound_dev_if);
+ id_priv);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -691,8 +695,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
cma_dev->default_gid_type[port - 1],
- gidp, dev_addr->dev_type,
- dev_addr->bound_dev_if);
+ gidp, id_priv);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -2036,6 +2039,33 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_get_service_id);
+void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
+ union ib_gid *dgid)
+{
+ struct rdma_addr *addr = &cm_id->route.addr;
+
+ if (!cm_id->device) {
+ if (sgid)
+ memset(sgid, 0, sizeof(*sgid));
+ if (dgid)
+ memset(dgid, 0, sizeof(*dgid));
+ return;
+ }
+
+ if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) {
+ if (sgid)
+ rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid);
+ if (dgid)
+ rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid);
+ } else {
+ if (sgid)
+ rdma_addr_get_sgid(&addr->dev_addr, sgid);
+ if (dgid)
+ rdma_addr_get_dgid(&addr->dev_addr, dgid);
+ }
+}
+EXPORT_SYMBOL(rdma_read_gids);
+
static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
@@ -2132,7 +2162,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
- ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -2414,6 +2444,26 @@ out:
kfree(work);
}
+static void cma_init_resolve_route_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
+{
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+}
+
+static void cma_init_resolve_addr_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
+{
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDR_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+}
+
static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2424,11 +2474,7 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ cma_init_resolve_route_work(work, id_priv);
route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
@@ -2449,10 +2495,63 @@ err1:
return ret;
}
-int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct sa_path_rec *path_rec, int num_paths)
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+ unsigned long supported_gids,
+ enum ib_gid_type default_gid)
+{
+ if ((network_type == RDMA_NETWORK_IPV4 ||
+ network_type == RDMA_NETWORK_IPV6) &&
+ test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+ return default_gid;
+}
+
+/*
+ * cma_iboe_set_path_rec_l2_fields() is helper function which sets
+ * path record type based on GID type.
+ * It also sets up other L2 fields which includes destination mac address
+ * netdev ifindex, of the path record.
+ * It returns the netdev of the bound interface for this path record entry.
+ */
+static struct net_device *
+cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
+{
+ struct rdma_route *route = &id_priv->id.route;
+ enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
+ struct rdma_addr *addr = &route->addr;
+ unsigned long supported_gids;
+ struct net_device *ndev;
+
+ if (!addr->dev_addr.bound_dev_if)
+ return NULL;
+
+ ndev = dev_get_by_index(addr->dev_addr.net,
+ addr->dev_addr.bound_dev_if);
+ if (!ndev)
+ return NULL;
+
+ supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+ id_priv->id.port_num);
+ gid_type = cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
+ /* Use the hint from IP Stack to select GID Type */
+ if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+
+ sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
+ sa_path_set_ifindex(route->path_rec, ndev->ifindex);
+ sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
+ return ndev;
+}
+
+int rdma_set_ib_path(struct rdma_cm_id *id,
+ struct sa_path_rec *path_rec)
{
struct rdma_id_private *id_priv;
+ struct net_device *ndev;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
@@ -2460,20 +2559,33 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
RDMA_CM_ROUTE_RESOLVED))
return -EINVAL;
- id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
+ id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec),
GFP_KERNEL);
if (!id->route.path_rec) {
ret = -ENOMEM;
goto err;
}
- id->route.num_paths = num_paths;
+ if (rdma_protocol_roce(id->device, id->port_num)) {
+ ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
+ if (!ndev) {
+ ret = -ENODEV;
+ goto err_free;
+ }
+ dev_put(ndev);
+ }
+
+ id->route.num_paths = 1;
return 0;
+
+err_free:
+ kfree(id->route.path_rec);
+ id->route.path_rec = NULL;
err:
cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
return ret;
}
-EXPORT_SYMBOL(rdma_set_ib_paths);
+EXPORT_SYMBOL(rdma_set_ib_path);
static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
{
@@ -2483,11 +2595,7 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
}
@@ -2510,26 +2618,14 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
-static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
- unsigned long supported_gids,
- enum ib_gid_type default_gid)
-{
- if ((network_type == RDMA_NETWORK_IPV4 ||
- network_type == RDMA_NETWORK_IPV6) &&
- test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
- return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
- return default_gid;
-}
-
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
struct rdma_addr *addr = &route->addr;
struct cma_work *work;
int ret;
- struct net_device *ndev = NULL;
- enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ struct net_device *ndev;
+
u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
@@ -2539,9 +2635,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
-
route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
@@ -2550,42 +2643,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->num_paths = 1;
- if (addr->dev_addr.bound_dev_if) {
- unsigned long supported_gids;
-
- ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
- if (!ndev) {
- ret = -ENODEV;
- goto err2;
- }
-
- supported_gids = roce_gid_type_mask_support(id_priv->id.device,
- id_priv->id.port_num);
- gid_type = cma_route_gid_type(addr->dev_addr.network,
- supported_gids,
- id_priv->gid_type);
- route->path_rec->rec_type =
- sa_conv_gid_to_pathrec_type(gid_type);
- sa_path_set_ndev(route->path_rec, &init_net);
- sa_path_set_ifindex(route->path_rec, ndev->ifindex);
- }
+ ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
-
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- /* Use the hint from IP Stack to select GID Type */
- if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
- gid_type = ib_network_to_gid_type(addr->dev_addr.network);
- route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
-
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -2607,11 +2675,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
- work->event.status = 0;
-
+ cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -2791,11 +2855,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ADDR_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
err:
@@ -2821,11 +2881,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ADDR_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
err:
@@ -3404,9 +3460,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
event.status = ret;
break;
}
- ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
- id_priv->id.route.path_rec,
- &event.param.ud.ah_attr);
+ ib_init_ah_attr_from_path(id_priv->id.device,
+ id_priv->id.port_num,
+ id_priv->id.route.path_rec,
+ &event.param.ud.ah_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3873,7 +3930,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev =
- dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
enum ib_gid_type gid_type =
id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
@@ -4010,8 +4067,10 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
- mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0;
- mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0;
+ mgid->raw[0] =
+ (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff;
+ mgid->raw[1] =
+ (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
@@ -4061,7 +4120,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev) {
err = -ENODEV;
goto out2;
@@ -4179,7 +4238,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
struct net_device *ndev = NULL;
if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net,
+ ndev = dev_get_by_index(dev_addr->net,
dev_addr->bound_dev_if);
if (ndev) {
cma_igmp_send(ndev,
@@ -4235,7 +4294,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
- if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+ if (!netif_is_bond_master(ndev))
return NOTIFY_DONE;
mutex_lock(&lock);
@@ -4432,7 +4491,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
goto out;
if (ibnl_put_attr(skb, nlh,
- rdma_addr_size(cma_src_addr(id_priv)),
+ rdma_addr_size(cma_dst_addr(id_priv)),
cma_dst_addr(id_priv),
RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
goto out;
@@ -4444,6 +4503,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
id_stats->qp_type = id->qp_type;
i_id++;
+ nlmsg_end(skb, nlh);
}
cb->args[1] = 0;
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 31dfee0c8295..eee38b40be99 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -295,7 +295,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 66f0268f37a6..c4560d84dfae 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -40,8 +40,12 @@
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
#include <rdma/ib_mad.h>
+#include <rdma/restrack.h>
#include "mad_priv.h"
+/* Total number of ports combined across all struct ib_devices's */
+#define RDMA_MAX_PORTS 1024
+
struct pkey_index_qp_list {
struct list_head pkey_index_list;
u16 pkey_index;
@@ -137,7 +141,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
-int roce_rescan_device(struct ib_device *ib_dev);
unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
@@ -191,13 +194,6 @@ void ib_sa_cleanup(void);
int rdma_nl_init(void);
void rdma_nl_exit(void);
-/**
- * Check if there are any listeners to the netlink group
- * @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
- */
-int ibnl_chk_listeners(unsigned int group);
-
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
@@ -213,11 +209,6 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
-int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec);
-
void ib_security_destroy_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
@@ -240,14 +231,6 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
#else
-static inline int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec)
-{
- return 0;
-}
-
static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
{
}
@@ -318,4 +301,31 @@ struct ib_device *ib_device_get_by_index(u32 ifindex);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
+
+static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ib_qp *qp;
+
+ qp = dev->create_qp(pd, attr, udata);
+ if (IS_ERR(qp))
+ return qp;
+
+ qp->device = dev;
+ qp->pd = pd;
+ /*
+ * We don't track XRC QPs for now, because they don't have PD
+ * and more importantly they are created internaly by driver,
+ * see mlx5 create_dev_resources() as an example.
+ */
+ if (attr->qp_type < IB_QPT_XRC_INI) {
+ qp->res.type = RDMA_RESTRACK_QP;
+ rdma_restrack_add(&qp->res);
+ } else
+ qp->res.valid = false;
+
+ return qp;
+}
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index f2ae75fa3128..bc79ca8215d7 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -25,9 +25,10 @@
#define IB_POLL_FLAGS \
(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
-static int __ib_process_cq(struct ib_cq *cq, int budget)
+static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
{
int i, n, completed = 0;
+ struct ib_wc *wcs = poll_wc ? : cq->wc;
/*
* budget might be (-1) if the caller does not
@@ -35,9 +36,9 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* minimum here.
*/
while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
- budget - completed), cq->wc)) > 0) {
+ budget - completed), wcs)) > 0) {
for (i = 0; i < n; i++) {
- struct ib_wc *wc = &cq->wc[i];
+ struct ib_wc *wc = &wcs[i];
if (wc->wr_cqe)
wc->wr_cqe->done(cq, wc);
@@ -60,18 +61,20 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
- * This function is used to process all outstanding CQ entries on a
- * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
- * context and does not ask for completion interrupts from the HCA.
+ * This function is used to process all outstanding CQ entries.
+ * It does not offload CQ processing to a different context and does
+ * not ask for completion interrupts from the HCA.
+ * Using direct processing on CQ with non IB_POLL_DIRECT type may trigger
+ * concurrent processing.
*
* Note: do not pass -1 as %budget unless it is guaranteed that the number
* of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
- WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+ struct ib_wc wcs[IB_POLL_BATCH];
- return __ib_process_cq(cq, budget);
+ return __ib_process_cq(cq, budget, wcs);
}
EXPORT_SYMBOL(ib_process_cq_direct);
@@ -85,7 +88,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
int completed;
- completed = __ib_process_cq(cq, budget);
+ completed = __ib_process_cq(cq, budget, NULL);
if (completed < budget) {
irq_poll_complete(&cq->iop);
if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
@@ -105,7 +108,7 @@ static void ib_cq_poll_work(struct work_struct *work)
struct ib_cq *cq = container_of(work, struct ib_cq, work);
int completed;
- completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+ completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, NULL);
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(ib_comp_wq, &cq->work);
@@ -117,20 +120,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * ib_alloc_cq - allocate a completion queue
+ * __ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
+ * @caller: module owner name.
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx, const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -154,6 +159,10 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
if (!cq->wc)
goto out_destroy_cq;
+ cq->res.type = RDMA_RESTRACK_CQ;
+ cq->res.kern_name = caller;
+ rdma_restrack_add(&cq->res);
+
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
cq->comp_handler = ib_cq_completion_direct;
@@ -178,11 +187,12 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
out_free_wc:
kfree(cq->wc);
+ rdma_restrack_del(&cq->res);
out_destroy_cq:
cq->device->destroy_cq(cq);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_alloc_cq);
+EXPORT_SYMBOL(__ib_alloc_cq);
/**
* ib_free_cq - free a completion queue
@@ -209,6 +219,7 @@ void ib_free_cq(struct ib_cq *cq)
}
kfree(cq->wc);
+ rdma_restrack_del(&cq->res);
ret = cq->device->destroy_cq(cq);
WARN_ON_ONCE(ret);
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 465520627e4b..e8010e73a1cf 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -263,6 +263,8 @@ struct ib_device *ib_alloc_device(size_t size)
if (!device)
return NULL;
+ rdma_restrack_init(&device->res);
+
device->dev.class = &ib_class;
device_initialize(&device->dev);
@@ -288,7 +290,7 @@ void ib_dealloc_device(struct ib_device *device)
{
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
device->reg_state != IB_DEV_UNINITIALIZED);
- kobject_put(&device->dev.kobj);
+ put_device(&device->dev);
}
EXPORT_SYMBOL(ib_dealloc_device);
@@ -462,7 +464,6 @@ int ib_register_device(struct ib_device *device,
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
struct device *parent = device->dev.parent;
- WARN_ON_ONCE(!parent);
WARN_ON_ONCE(device->dma_device);
if (device->dev.dma_ops) {
/*
@@ -471,16 +472,25 @@ int ib_register_device(struct ib_device *device,
* into device->dev.
*/
device->dma_device = &device->dev;
- if (!device->dev.dma_mask)
- device->dev.dma_mask = parent->dma_mask;
- if (!device->dev.coherent_dma_mask)
- device->dev.coherent_dma_mask =
- parent->coherent_dma_mask;
+ if (!device->dev.dma_mask) {
+ if (parent)
+ device->dev.dma_mask = parent->dma_mask;
+ else
+ WARN_ON_ONCE(true);
+ }
+ if (!device->dev.coherent_dma_mask) {
+ if (parent)
+ device->dev.coherent_dma_mask =
+ parent->coherent_dma_mask;
+ else
+ WARN_ON_ONCE(true);
+ }
} else {
/*
* The caller did not provide custom DMA operations. Use the
* DMA mapping operations of the parent device.
*/
+ WARN_ON_ONCE(!parent);
device->dma_device = parent;
}
@@ -588,6 +598,8 @@ void ib_unregister_device(struct ib_device *device)
}
up_read(&lists_rwsem);
+ rdma_restrack_clean(&device->res);
+
ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
@@ -1033,32 +1045,22 @@ EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
- * a specified GID value occurs.
+ * a specified GID value occurs. Its searches only for IB link layer.
* @device: The device to query.
* @gid: The GID value to search for.
- * @gid_type: Type of GID.
* @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- enum ib_gid_type gid_type, struct net_device *ndev,
- u8 *port_num, u16 *index)
+ struct net_device *ndev, u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
- if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
- ndev, index)) {
- *port_num = port;
- return 0;
- }
- }
-
- if (gid_type != IB_GID_TYPE_IB)
+ if (rdma_cap_roce_gid_table(device, port))
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 84d2615b5d4b..a0a9ed719031 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -388,13 +388,11 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
EXPORT_SYMBOL(ib_flush_fmr_pool);
/**
- * ib_fmr_pool_map_phys -
- * @pool:FMR pool to allocate FMR from
- * @page_list:List of pages to map
- * @list_len:Number of pages in @page_list
- * @io_virtual_address:I/O virtual address for new FMR
- *
- * Map an FMR from an FMR pool.
+ * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
+ * @pool_handle: FMR pool to allocate FMR from
+ * @page_list: List of pages to map
+ * @list_len: Number of pages in @page_list
+ * @io_virtual_address: I/O virtual address for new FMR
*/
struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
u64 *page_list,
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 3c4faadb8cdd..81528f64061a 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -654,6 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
}
skb_num++;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ ret = -EINVAL;
for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
hlist_node) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index cb91245e9163..c50596f7f98a 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -49,7 +49,6 @@
#include "smi.h"
#include "opa_smi.h"
#include "agent.h"
-#include "core_priv.h"
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 1fb72c356e36..3ccaae18ad75 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -41,8 +41,6 @@
#include <linux/module.h>
#include "core_priv.h"
-#include "core_priv.h"
-
static DEFINE_MUTEX(rdma_nl_mutex);
static struct sock *nls;
static struct {
@@ -83,15 +81,13 @@ static bool is_nl_valid(unsigned int type, unsigned int op)
if (!is_nl_msg_valid(type, op))
return false;
- cb_table = rdma_nl_types[type].cb_table;
-#ifdef CONFIG_MODULES
- if (!cb_table) {
+ if (!rdma_nl_types[type].cb_table) {
mutex_unlock(&rdma_nl_mutex);
request_module("rdma-netlink-subsys-%d", type);
mutex_lock(&rdma_nl_mutex);
- cb_table = rdma_nl_types[type].cb_table;
}
-#endif
+
+ cb_table = rdma_nl_types[type].cb_table;
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
return false;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0dcd1aa6f683..fa8655e3b3ed 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -31,6 +31,8 @@
*/
#include <linux/module.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
#include <net/netlink.h>
#include <rdma/rdma_netlink.h>
@@ -52,16 +54,42 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
+ .len = 16 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
+ .len = TASK_COMM_LEN },
};
-static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
- char fw[IB_FW_VERSION_NAME_MAX];
-
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE;
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
return -EMSGSIZE;
+
+ return 0;
+}
+
+static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+{
+ char fw[IB_FW_VERSION_NAME_MAX];
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
return -EMSGSIZE;
@@ -92,10 +120,9 @@ static int fill_port_info(struct sk_buff *msg,
struct ib_port_attr attr;
int ret;
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
- return -EMSGSIZE;
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
+ if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
+
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
return -EMSGSIZE;
@@ -126,6 +153,137 @@ static int fill_port_info(struct sk_buff *msg,
return 0;
}
+static int fill_res_info_entry(struct sk_buff *msg,
+ const char *name, u64 curr)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
+ goto err;
+ if (nla_put_u64_64bit(msg,
+ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
+{
+ static const char * const names[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_PD] = "pd",
+ [RDMA_RESTRACK_CQ] = "cq",
+ [RDMA_RESTRACK_QP] = "qp",
+ };
+
+ struct rdma_restrack_root *res = &device->res;
+ struct nlattr *table_attr;
+ int ret, i, curr;
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
+ if (!names[i])
+ continue;
+ curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
+ ret = fill_res_info_entry(msg, names[i], curr);
+ if (ret)
+ goto err;
+ }
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return ret;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg,
+ struct ib_qp *qp, uint32_t port)
+{
+ struct rdma_restrack_entry *res = &qp->res;
+ struct ib_qp_init_attr qp_init_attr;
+ struct nlattr *entry_attr;
+ struct ib_qp_attr qp_attr;
+ int ret;
+
+ ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
+ if (ret)
+ return ret;
+
+ if (port && port != qp_attr.port_num)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ /* In create_qp() port is not set yet */
+ if (qp_attr.port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
+ goto err;
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
+ qp_attr.dest_qp_num))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
+ qp_attr.rq_psn))
+ goto err;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
+ goto err;
+
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
+ qp_attr.path_mig_state))
+ goto err;
+ }
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
+ goto err;
+
+ /*
+ * Existence of task means that it is user QP and netlink
+ * user is invited to go and read /proc/PID/comm to get name
+ * of the task file and res->task_com should be NULL.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
+ goto err;
+ }
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -321,6 +479,213 @@ out:
return skb->len;
}
+static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ goto err;
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, 0);
+
+ ret = fill_res_info(msg, device);
+ if (ret)
+ goto err_free;
+
+ nlmsg_end(msg, nlh);
+ put_device(&device->dev);
+ return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ put_device(&device->dev);
+ return ret;
+}
+
+static int _nldev_res_get_dumpit(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx)
+{
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+
+ if (idx < start)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_res_info(skb, device)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ nlmsg_end(skb, nlh);
+
+ idx++;
+
+out:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nldev_res_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
+}
+
+static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ int err, ret = 0, idx = 0;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct ib_qp *qp = NULL;
+ struct nlmsghdr *nlh;
+ u32 index, port = 0;
+
+ err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NULL);
+ /*
+ * Right now, we are expecting the device index to get QP information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(index);
+ if (!device)
+ return -EINVAL;
+
+ /*
+ * If no PORT_INDEX is supplied, we will return all QPs from that device
+ */
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err_index;
+ }
+ }
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
+ 0, NLM_F_MULTI);
+
+ if (fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ down_read(&device->res.rwsem);
+ hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
+ if (idx < start)
+ goto next;
+
+ if ((rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != &init_pid_ns) ||
+ (!rdma_is_kernel_res(res) &&
+ task_active_pid_ns(current) != task_active_pid_ns(res->task)))
+ /*
+ * 1. Kernel QPs should be visible in init namspace only
+ * 2. Present only QPs visible in the current namespace
+ */
+ goto next;
+
+ if (!rdma_restrack_get(res))
+ /*
+ * Resource is under release now, but we are not
+ * relesing lock now, so it will be released in
+ * our next pass, once we will get ->next pointer.
+ */
+ goto next;
+
+ qp = container_of(res, struct ib_qp, res);
+
+ up_read(&device->res.rwsem);
+ ret = fill_res_qp_entry(skb, qp, port);
+ down_read(&device->res.rwsem);
+ /*
+ * Return resource back, but it won't be released till
+ * the &device->res.rwsem will be released for write.
+ */
+ rdma_restrack_put(res);
+
+ if (ret == -EMSGSIZE)
+ /*
+ * There is a chance to optimize here.
+ * It can be done by using list_prepare_entry
+ * and list_for_each_entry_continue afterwards.
+ */
+ break;
+ if (ret)
+ goto res_err;
+next: idx++;
+ }
+ up_read(&device->res.rwsem);
+
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more QPs to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!qp)
+ goto err;
+
+ put_device(&device->dev);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+ up_read(&device->res.rwsem);
+
+err:
+ nlmsg_cancel(skb, nlh);
+
+err_index:
+ put_device(&device->dev);
+ return ret;
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -330,6 +695,23 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_GET] = {
+ .doit = nldev_res_get_doit,
+ .dump = nldev_res_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_QP_GET] = {
+ .dump = nldev_res_get_qp_dumpit,
+ /*
+ * .doit is not implemented yet for two reasons:
+ * 1. It is not needed yet.
+ * 2. There is a need to provide identifier, while it is easy
+ * for the QPs (device index + port index + LQPN), it is not
+ * the case for the rest of resources (PD and CQ). Because it
+ * is better to provide similar interface for all resources,
+ * let's wait till we will have other resources implemented
+ * too.
+ */
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
new file mode 100644
index 000000000000..857637bf46da
--- /dev/null
+++ b/drivers/infiniband/core/restrack.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/restrack.h>
+#include <linux/mutex.h>
+#include <linux/sched/task.h>
+#include <linux/uaccess.h>
+#include <linux/pid_namespace.h>
+
+void rdma_restrack_init(struct rdma_restrack_root *res)
+{
+ init_rwsem(&res->rwsem);
+}
+
+void rdma_restrack_clean(struct rdma_restrack_root *res)
+{
+ WARN_ON_ONCE(!hash_empty(res->hash));
+}
+
+int rdma_restrack_count(struct rdma_restrack_root *res,
+ enum rdma_restrack_type type,
+ struct pid_namespace *ns)
+{
+ struct rdma_restrack_entry *e;
+ u32 cnt = 0;
+
+ down_read(&res->rwsem);
+ hash_for_each_possible(res->hash, e, node, type) {
+ if (ns == &init_pid_ns ||
+ (!rdma_is_kernel_res(e) &&
+ ns == task_active_pid_ns(e->task)))
+ cnt++;
+ }
+ up_read(&res->rwsem);
+ return cnt;
+}
+EXPORT_SYMBOL(rdma_restrack_count);
+
+static void set_kern_name(struct rdma_restrack_entry *res)
+{
+ enum rdma_restrack_type type = res->type;
+ struct ib_qp *qp;
+
+ if (type != RDMA_RESTRACK_QP)
+ /* PD and CQ types already have this name embedded in */
+ return;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->pd) {
+ WARN_ONCE(true, "XRC QPs are not supported\n");
+ /* Survive, despite the programmer's error */
+ res->kern_name = " ";
+ return;
+ }
+
+ res->kern_name = qp->pd->res.kern_name;
+}
+
+static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
+{
+ enum rdma_restrack_type type = res->type;
+ struct ib_device *dev;
+ struct ib_xrcd *xrcd;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+
+ switch (type) {
+ case RDMA_RESTRACK_PD:
+ pd = container_of(res, struct ib_pd, res);
+ dev = pd->device;
+ break;
+ case RDMA_RESTRACK_CQ:
+ cq = container_of(res, struct ib_cq, res);
+ dev = cq->device;
+ break;
+ case RDMA_RESTRACK_QP:
+ qp = container_of(res, struct ib_qp, res);
+ dev = qp->device;
+ break;
+ case RDMA_RESTRACK_XRCD:
+ xrcd = container_of(res, struct ib_xrcd, res);
+ dev = xrcd->device;
+ break;
+ default:
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
+ return NULL;
+ }
+
+ return dev;
+}
+
+void rdma_restrack_add(struct rdma_restrack_entry *res)
+{
+ struct ib_device *dev = res_to_dev(res);
+
+ if (!dev)
+ return;
+
+ if (!uaccess_kernel()) {
+ get_task_struct(current);
+ res->task = current;
+ res->kern_name = NULL;
+ } else {
+ set_kern_name(res);
+ res->task = NULL;
+ }
+
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->valid = true;
+
+ down_write(&dev->res.rwsem);
+ hash_add(dev->res.hash, &res->node, res->type);
+ up_write(&dev->res.rwsem);
+}
+EXPORT_SYMBOL(rdma_restrack_add);
+
+int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
+{
+ return kref_get_unless_zero(&res->kref);
+}
+EXPORT_SYMBOL(rdma_restrack_get);
+
+static void restrack_release(struct kref *kref)
+{
+ struct rdma_restrack_entry *res;
+
+ res = container_of(kref, struct rdma_restrack_entry, kref);
+ complete(&res->comp);
+}
+
+int rdma_restrack_put(struct rdma_restrack_entry *res)
+{
+ return kref_put(&res->kref, restrack_release);
+}
+EXPORT_SYMBOL(rdma_restrack_put);
+
+void rdma_restrack_del(struct rdma_restrack_entry *res)
+{
+ struct ib_device *dev;
+
+ if (!res->valid)
+ return;
+
+ dev = res_to_dev(res);
+ if (!dev)
+ return;
+
+ rdma_restrack_put(res);
+
+ wait_for_completion(&res->comp);
+
+ down_write(&dev->res.rwsem);
+ hash_del(&res->node);
+ res->valid = false;
+ if (res->task)
+ put_task_struct(res->task);
+ up_write(&dev->res.rwsem);
+}
+EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 90e3889b7fbe..5a52ec77940a 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -410,15 +410,18 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
rtnl_unlock();
}
-/* This function will rescan all of the network devices in the system
- * and add their gids, as needed, to the relevant RoCE devices. */
-int roce_rescan_device(struct ib_device *ib_dev)
+/**
+ * rdma_roce_rescan_device - Rescan all of the network devices in the system
+ * and add their gids, as needed, to the relevant RoCE devices.
+ *
+ * @device: the rdma device
+ */
+void rdma_roce_rescan_device(struct ib_device *ib_dev)
{
ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
enum_all_gids_of_dev_cb, NULL);
-
- return 0;
}
+EXPORT_SYMBOL(rdma_roce_rescan_device);
static void callback_for_addr_gid_device_scan(struct ib_device *device,
u8 port,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index ab5e1024fea9..8cf15d4a8ac4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,9 +1227,9 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -1341,10 +1341,11 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
return 0;
}
-EXPORT_SYMBOL(ib_init_ah_from_path);
+EXPORT_SYMBOL(ib_init_ah_attr_from_path);
static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
{
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
spin_lock_irqsave(&query->port->ah_lock, flags);
@@ -1356,6 +1357,15 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah = query->port->sm_ah;
spin_unlock_irqrestore(&query->port->ah_lock, flags);
+ /*
+ * Always check if sm_ah has valid dlid assigned,
+ * before querying for class port info
+ */
+ if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
+ !rdma_is_valid_unicast_lid(&ah_attr)) {
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+ return -EAGAIN;
+ }
query->mad_buf = ib_create_send_mad(query->port->agent, 1,
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 59b2f96d986a..b61dda6b04fc 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -653,12 +653,11 @@ int ib_security_modify_qp(struct ib_qp *qp,
}
return ret;
}
-EXPORT_SYMBOL(ib_security_modify_qp);
-int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
- u16 pkey_index,
- void *sec)
+static int ib_security_pkey_access(struct ib_device *dev,
+ u8 port_num,
+ u16 pkey_index,
+ void *sec)
{
u64 subnet_prefix;
u16 pkey;
@@ -678,7 +677,6 @@ int ib_security_pkey_access(struct ib_device *dev,
return security_ib_pkey_access(sec, subnet_prefix, pkey);
}
-EXPORT_SYMBOL(ib_security_pkey_access);
static int ib_mad_agent_security_change(struct notifier_block *nb,
unsigned long event,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index e30d86fa1855..8ae1308eecc7 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1276,7 +1276,6 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- WARN_ON_ONCE(!device->dev.parent);
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f7adae0adc19..8ae636bb09e5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -53,6 +53,8 @@
#include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Libor Michalek");
MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
MODULE_LICENSE("Dual BSD/GPL");
@@ -104,10 +106,13 @@ struct ib_ucm_event {
enum {
IB_UCM_MAJOR = 231,
IB_UCM_BASE_MINOR = 224,
- IB_UCM_MAX_DEVICES = 32
+ IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UCM_NUM_FIXED_MINOR = 32,
+ IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR,
};
#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
+static dev_t dynamic_ucm_dev;
static void ib_ucm_add_one(struct ib_device *device);
static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
@@ -1199,7 +1204,6 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
return 0;
}
-static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
static void ib_ucm_release_dev(struct device *dev)
{
struct ib_ucm_device *ucm_dev;
@@ -1210,10 +1214,7 @@ static void ib_ucm_release_dev(struct device *dev)
static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
{
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(ucm_dev->devnum, dev_map);
- else
- clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
+ clear_bit(ucm_dev->devnum, dev_map);
}
static const struct file_operations ucm_fops = {
@@ -1235,27 +1236,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-static dev_t overflow_maj;
-static int find_overflow_devnum(void)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
- if (ret >= IB_UCM_MAX_DEVICES)
- return -1;
-
- return ret;
-}
-
static void ib_ucm_add_one(struct ib_device *device)
{
int devnum;
@@ -1274,19 +1254,14 @@ static void ib_ucm_add_one(struct ib_device *device)
ucm_dev->dev.release = ib_ucm_release_dev;
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (devnum >= IB_UCM_MAX_DEVICES) {
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- ucm_dev->devnum = devnum;
- base = devnum + IB_UCM_BASE_DEV;
- set_bit(devnum, dev_map);
- }
+ if (devnum >= IB_UCM_MAX_DEVICES)
+ goto err;
+ ucm_dev->devnum = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UCM_NUM_FIXED_MINOR)
+ base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR;
+ else
+ base = IB_UCM_BASE_DEV + devnum;
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
@@ -1334,13 +1309,20 @@ static int __init ib_ucm_init(void)
{
int ret;
- ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR,
"infiniband_cm");
if (ret) {
pr_err("ucm: couldn't register device number\n");
goto error1;
}
+ ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR,
+ "infiniband_cm");
+ if (ret) {
+ pr_err("ucm: couldn't register dynamic device number\n");
+ goto err_alloc;
+ }
+
ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
if (ret) {
pr_err("ucm: couldn't create abi_version attribute\n");
@@ -1357,7 +1339,9 @@ static int __init ib_ucm_init(void)
error3:
class_remove_file(&cm_class, &class_attr_abi_version.attr);
error2:
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
+err_alloc:
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
error1:
return ret;
}
@@ -1366,9 +1350,8 @@ static void __exit ib_ucm_cleanup(void)
{
ib_unregister_client(&ucm_client);
class_remove_file(&cm_class, &class_attr_abi_version.attr);
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
idr_destroy(&ctx_id_table);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e4be89d1f3d8..6ba4231f2b07 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -904,13 +904,14 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL;
- if (rec->rec_type == SA_PATH_REC_TYPE_IB) {
- ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
- } else {
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
struct sa_path_rec ib;
sa_convert_path_opa_to_ib(&ib, rec);
ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
+
+ } else {
+ ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
}
}
@@ -943,8 +944,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
+ NULL);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.src_addr);
}
@@ -956,8 +957,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, NULL,
+ (union ib_gid *)&addr->sib_addr);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.dst_addr);
}
@@ -1231,9 +1232,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
struct sa_path_rec opa;
sa_convert_path_ib_to_opa(&opa, &sa_path);
- ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1);
+ ret = rdma_set_ib_path(ctx->cm_id, &opa);
} else {
- ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
}
if (ret)
return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 130606c3b07c..9a4e899d94b3 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -352,7 +352,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
return -EINVAL;
}
- ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
+ ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
offset + ib_umem_offset(umem));
if (ret < 0)
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 0c32d10f23ff..78c77962422e 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -55,16 +55,21 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
MODULE_LICENSE("Dual BSD/GPL");
enum {
- IB_UMAD_MAX_PORTS = 64,
+ IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS,
IB_UMAD_MAX_AGENTS = 32,
IB_UMAD_MAJOR = 231,
- IB_UMAD_MINOR_BASE = 0
+ IB_UMAD_MINOR_BASE = 0,
+ IB_UMAD_NUM_FIXED_MINOR = 64,
+ IB_UMAD_NUM_DYNAMIC_MINOR = IB_UMAD_MAX_PORTS - IB_UMAD_NUM_FIXED_MINOR,
+ IB_ISSM_MINOR_BASE = IB_UMAD_NUM_FIXED_MINOR,
};
/*
@@ -127,9 +132,12 @@ struct ib_umad_packet {
static struct class *umad_class;
-static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
+ IB_UMAD_NUM_FIXED_MINOR;
+static dev_t dynamic_umad_dev;
+static dev_t dynamic_issm_dev;
-static DEFINE_SPINLOCK(port_lock);
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
@@ -233,8 +241,7 @@ static void recv_handler(struct ib_mad_agent *agent,
* On OPA devices it is okay to lose the upper 16 bits of LID as this
* information is obtained elsewhere. Mask off the upper 16 bits.
*/
- if (agent->device->port_immutable[agent->port_num].core_cap_flags &
- RDMA_CORE_PORT_INTEL_OPA)
+ if (rdma_cap_opa_mad(agent->device, agent->port_num))
packet->mad.hdr.lid = ib_lid_be16(0xFFFF &
mad_recv_wc->wc->slid);
else
@@ -246,10 +253,14 @@ static void recv_handler(struct ib_mad_agent *agent,
if (packet->mad.hdr.grh_present) {
struct rdma_ah_attr ah_attr;
const struct ib_global_route *grh;
+ int ret;
- ib_init_ah_from_wc(agent->device, agent->port_num,
- mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
- &ah_attr);
+ ret = ib_init_ah_attr_from_wc(agent->device, agent->port_num,
+ mad_recv_wc->wc,
+ mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto err2;
grh = rdma_ah_read_grh(&ah_attr);
packet->mad.hdr.gid_index = grh->sgid_index;
@@ -500,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
}
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.type = rdma_ah_find_type(file->port->ib_dev,
+ ah_attr.type = rdma_ah_find_type(agent->device,
file->port->port_num);
rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
@@ -1139,54 +1150,26 @@ static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_MAD_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(struct ib_device *device)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
- "infiniband_mad");
- if (ret) {
- dev_err(&device->dev,
- "couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
- if (ret >= IB_UMAD_MAX_PORTS)
- return -1;
-
- return ret;
-}
-
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_device *umad_dev,
struct ib_umad_port *port)
{
int devnum;
- dev_t base;
+ dev_t base_umad;
+ dev_t base_issm;
- spin_lock(&port_lock);
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (devnum >= IB_UMAD_MAX_PORTS) {
- spin_unlock(&port_lock);
- devnum = find_overflow_devnum(device);
- if (devnum < 0)
- return -1;
-
- spin_lock(&port_lock);
- port->dev_num = devnum + IB_UMAD_MAX_PORTS;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
+ if (devnum >= IB_UMAD_MAX_PORTS)
+ return -1;
+ port->dev_num = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
+ base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
+ base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
} else {
- port->dev_num = devnum;
- base = devnum + base_dev;
- set_bit(devnum, dev_map);
+ base_umad = devnum + base_umad_dev;
+ base_issm = devnum + base_issm_dev;
}
- spin_unlock(&port_lock);
port->ib_dev = device;
port->port_num = port_num;
@@ -1198,7 +1181,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
port->cdev.owner = THIS_MODULE;
cdev_set_parent(&port->cdev, &umad_dev->kobj);
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
- if (cdev_add(&port->cdev, base, 1))
+ if (cdev_add(&port->cdev, base_umad, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dev.parent,
@@ -1212,12 +1195,11 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->dev, &dev_attr_port))
goto err_dev;
- base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
- if (cdev_add(&port->sm_cdev, base, 1))
+ if (cdev_add(&port->sm_cdev, base_issm, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dev.parent,
@@ -1244,10 +1226,7 @@ err_dev:
err_cdev:
cdev_del(&port->cdev);
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ clear_bit(devnum, dev_map);
return -1;
}
@@ -1281,11 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
-
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(port->dev_num, dev_map);
- else
- clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
+ clear_bit(port->dev_num, dev_map);
}
static void ib_umad_add_one(struct ib_device *device)
@@ -1361,13 +1336,23 @@ static int __init ib_umad_init(void)
{
int ret;
- ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
+ ret = register_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2,
"infiniband_mad");
if (ret) {
pr_err("couldn't register device number\n");
goto out;
}
+ ret = alloc_chrdev_region(&dynamic_umad_dev, 0,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2,
+ "infiniband_mad");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+ dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR;
+
umad_class = class_create(THIS_MODULE, "infiniband_mad");
if (IS_ERR(umad_class)) {
ret = PTR_ERR(umad_class);
@@ -1395,7 +1380,12 @@ out_class:
class_destroy(umad_class);
out_chrdev:
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
+
+out_alloc:
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
out:
return ret;
@@ -1405,9 +1395,10 @@ static void __exit ib_umad_cleanup(void)
{
ib_unregister_client(&umad_client);
class_destroy(umad_class);
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
}
module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 840b24096690..256934d1f64f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -340,6 +340,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
uobj->object = pd;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
+ pd->res.type = RDMA_RESTRACK_PD;
+ rdma_restrack_add(&pd->res);
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
ret = -EFAULT;
@@ -1033,6 +1035,8 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
goto err_cb;
uobj_alloc_commit(&obj->uobject);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
return obj;
@@ -1145,10 +1149,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
min(ucore->inlen, sizeof(cmd)),
ib_uverbs_ex_create_cq_cb, NULL);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return 0;
+ return PTR_ERR_OR_ZERO(obj);
}
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
@@ -1199,7 +1200,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
tmp.opcode = wc->opcode;
tmp.vendor_err = wc->vendor_err;
tmp.byte_len = wc->byte_len;
- tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data;
+ tmp.ex.imm_data = wc->ex.imm_data;
tmp.qp_num = wc->qp->qp_num;
tmp.src_qp = wc->src_qp;
tmp.wc_flags = wc->wc_flags;
@@ -1517,7 +1518,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = device->create_qp(pd, &attr, uhw);
+ qp = _ib_create_qp(device, pd, &attr, uhw);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
@@ -1530,7 +1531,6 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_cb;
qp->real_qp = qp;
- qp->device = device;
qp->pd = pd;
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 71ff2644e053..d96dc1d17be1 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -243,16 +243,13 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
size_t ctx_size;
uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
- if (hdr->reserved)
- return -EINVAL;
-
object_spec = uverbs_get_object(ib_dev, hdr->object_id);
if (!object_spec)
- return -EOPNOTSUPP;
+ return -EPROTONOSUPPORT;
method_spec = uverbs_get_method(object_spec, hdr->method_id);
if (!method_spec)
- return -EOPNOTSUPP;
+ return -EPROTONOSUPPORT;
if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext)
return -EINVAL;
@@ -305,6 +302,16 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
file, method_spec, ctx->uverbs_attr_bundle);
+
+ /*
+ * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
+ * not invoke the method because the request is not supported. No
+ * other cases should return this code.
+ */
+ if (unlikely(err == -EPROTONOSUPPORT)) {
+ WARN_ON_ONCE(err == -EPROTONOSUPPORT);
+ err = -EINVAL;
+ }
out:
if (ctx != (void *)data)
kfree(ctx);
@@ -341,7 +348,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
if (hdr.reserved) {
- err = -EOPNOTSUPP;
+ err = -EPROTONOSUPPORT;
goto out;
}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5f216ffb465a..5b811bf574d6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -62,14 +62,16 @@ MODULE_LICENSE("Dual BSD/GPL");
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
- IB_UVERBS_MAX_DEVICES = 32
+ IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UVERBS_NUM_FIXED_MINOR = 32,
+ IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
};
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
-static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
@@ -1005,34 +1007,6 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
-
-/*
- * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
- * requesting a new major number and doubling the number of max devices we
- * support. It's stupid, but simple.
- */
-static int find_overflow_devnum(void)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
- "infiniband_verbs");
- if (ret) {
- pr_err("user_verbs: couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
- if (ret >= IB_UVERBS_MAX_DEVICES)
- return -1;
-
- return ret;
-}
-
static void ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
@@ -1062,24 +1036,15 @@ static void ib_uverbs_add_one(struct ib_device *device)
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
- spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (devnum >= IB_UVERBS_MAX_DEVICES) {
- spin_unlock(&map_lock);
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- spin_lock(&map_lock);
- uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- uverbs_dev->devnum = devnum;
- base = devnum + IB_UVERBS_BASE_DEV;
- set_bit(devnum, dev_map);
- }
- spin_unlock(&map_lock);
+ if (devnum >= IB_UVERBS_MAX_DEVICES)
+ goto err;
+ uverbs_dev->devnum = devnum;
+ set_bit(devnum, dev_map);
+ if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
+ base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
+ else
+ base = IB_UVERBS_BASE_DEV + devnum;
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
@@ -1124,10 +1089,7 @@ err_class:
err_cdev:
cdev_del(&uverbs_dev->cdev);
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ clear_bit(devnum, dev_map);
err:
if (atomic_dec_and_test(&uverbs_dev->refcount))
@@ -1219,11 +1181,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
dev_set_drvdata(uverbs_dev->dev, NULL);
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
cdev_del(&uverbs_dev->cdev);
-
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(uverbs_dev->devnum, dev_map);
- else
- clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
+ clear_bit(uverbs_dev->devnum, dev_map);
if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
@@ -1265,13 +1223,22 @@ static int __init ib_uverbs_init(void)
{
int ret;
- ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR,
"infiniband_verbs");
if (ret) {
pr_err("user_verbs: couldn't register device number\n");
goto out;
}
+ ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
+ IB_UVERBS_NUM_DYNAMIC_MINOR,
+ "infiniband_verbs");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+
uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
if (IS_ERR(uverbs_class)) {
ret = PTR_ERR(uverbs_class);
@@ -1299,7 +1266,12 @@ out_class:
class_destroy(uverbs_class);
out_chrdev:
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
+
+out_alloc:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
out:
return ret;
@@ -1309,9 +1281,10 @@ static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
class_destroy(uverbs_class);
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index c3ee5d9b336d..b571176babbe 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -35,6 +35,7 @@
#include <rdma/ib_verbs.h>
#include <linux/bug.h>
#include <linux/file.h>
+#include <rdma/restrack.h>
#include "rdma_core.h"
#include "uverbs.h"
@@ -319,6 +320,8 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev,
obj->uobject.object = cq;
obj->uobject.user_handle = user_handle;
atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
if (ret)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e36d27ed4daa..16ebc6372c31 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -124,16 +124,24 @@ EXPORT_SYMBOL(ib_wc_status_msg);
__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
- case IB_RATE_2_5_GBPS: return 1;
- case IB_RATE_5_GBPS: return 2;
- case IB_RATE_10_GBPS: return 4;
- case IB_RATE_20_GBPS: return 8;
- case IB_RATE_30_GBPS: return 12;
- case IB_RATE_40_GBPS: return 16;
- case IB_RATE_60_GBPS: return 24;
- case IB_RATE_80_GBPS: return 32;
- case IB_RATE_120_GBPS: return 48;
- default: return -1;
+ case IB_RATE_2_5_GBPS: return 1;
+ case IB_RATE_5_GBPS: return 2;
+ case IB_RATE_10_GBPS: return 4;
+ case IB_RATE_20_GBPS: return 8;
+ case IB_RATE_30_GBPS: return 12;
+ case IB_RATE_40_GBPS: return 16;
+ case IB_RATE_60_GBPS: return 24;
+ case IB_RATE_80_GBPS: return 32;
+ case IB_RATE_120_GBPS: return 48;
+ case IB_RATE_14_GBPS: return 6;
+ case IB_RATE_56_GBPS: return 22;
+ case IB_RATE_112_GBPS: return 45;
+ case IB_RATE_168_GBPS: return 67;
+ case IB_RATE_25_GBPS: return 10;
+ case IB_RATE_100_GBPS: return 40;
+ case IB_RATE_200_GBPS: return 80;
+ case IB_RATE_300_GBPS: return 120;
+ default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mult);
@@ -141,16 +149,24 @@ EXPORT_SYMBOL(ib_rate_to_mult);
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
- case 1: return IB_RATE_2_5_GBPS;
- case 2: return IB_RATE_5_GBPS;
- case 4: return IB_RATE_10_GBPS;
- case 8: return IB_RATE_20_GBPS;
- case 12: return IB_RATE_30_GBPS;
- case 16: return IB_RATE_40_GBPS;
- case 24: return IB_RATE_60_GBPS;
- case 32: return IB_RATE_80_GBPS;
- case 48: return IB_RATE_120_GBPS;
- default: return IB_RATE_PORT_CURRENT;
+ case 1: return IB_RATE_2_5_GBPS;
+ case 2: return IB_RATE_5_GBPS;
+ case 4: return IB_RATE_10_GBPS;
+ case 8: return IB_RATE_20_GBPS;
+ case 12: return IB_RATE_30_GBPS;
+ case 16: return IB_RATE_40_GBPS;
+ case 24: return IB_RATE_60_GBPS;
+ case 32: return IB_RATE_80_GBPS;
+ case 48: return IB_RATE_120_GBPS;
+ case 6: return IB_RATE_14_GBPS;
+ case 22: return IB_RATE_56_GBPS;
+ case 45: return IB_RATE_112_GBPS;
+ case 67: return IB_RATE_168_GBPS;
+ case 10: return IB_RATE_25_GBPS;
+ case 40: return IB_RATE_100_GBPS;
+ case 80: return IB_RATE_200_GBPS;
+ case 120: return IB_RATE_300_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
}
}
EXPORT_SYMBOL(mult_to_ib_rate);
@@ -247,6 +263,10 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
}
+ pd->res.type = RDMA_RESTRACK_PD;
+ pd->res.kern_name = caller;
+ rdma_restrack_add(&pd->res);
+
if (mr_access_flags) {
struct ib_mr *mr;
@@ -296,6 +316,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
+ rdma_restrack_del(&pd->res);
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
ret = pd->device->dealloc_pd(pd);
@@ -421,8 +442,7 @@ static bool find_gid_index(const union ib_gid *gid,
const struct ib_gid_attr *gid_attr,
void *context)
{
- struct find_gid_index_context *ctx =
- (struct find_gid_index_context *)context;
+ struct find_gid_index_context *ctx = context;
if (ctx->gid_type != gid_attr->gid_type)
return false;
@@ -481,8 +501,53 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
+/* Resolve destination mac address and hop limit for unicast destination
+ * GID entry, considering the source GID entry as well.
+ * ah_attribute must have have valid port_num, sgid_index.
+ */
+static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct ib_gid_attr sgid_attr;
+ struct ib_global_route *grh;
+ int hop_limit = 0xff;
+ union ib_gid sgid;
+ int ret;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ ret = ib_query_gid(device,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
+ &sgid, &sgid_attr);
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ return ret;
+ }
+
+ /* If destination is link local and source GID is RoCEv1,
+ * IP stack is not used.
+ */
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
+ sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
+ goto done;
+ }
+
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ sgid_attr.ndev, &hop_limit);
+done:
+ dev_put(sgid_attr.ndev);
+
+ grh->hop_limit = hop_limit;
+ return ret;
+}
+
/*
- * This function creates ah from the incoming packet.
+ * This function initializes address handle attributes from the incoming packet.
* Incoming packet has dgid of the receiver node on which this code is
* getting executed and, sgid contains the GID of the sender.
*
@@ -490,13 +555,10 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
* as sgid and, sgid is used as dgid because sgid contains destinations
* GID whom to respond to.
*
- * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
- * position of arguments dgid and sgid do not match the order of the
- * parameters.
*/
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
- const struct ib_wc *wc, const struct ib_grh *grh,
- struct rdma_ah_attr *ah_attr)
+int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -523,57 +585,33 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (ret)
return ret;
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
if (rdma_protocol_roce(device, port_num)) {
- int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
- struct net_device *idev;
- struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->roce.dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- &if_index, &hoplimit);
- if (ret) {
- dev_put(idev);
- return ret;
- }
-
- resolved_dev = dev_get_by_index(&init_net, if_index);
- rcu_read_lock();
- if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
- resolved_dev))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
+ ret = get_sgid_index_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type, &gid_index);
if (ret)
return ret;
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &dgid, gid_type, &gid_index);
- if (ret)
- return ret;
- }
-
- rdma_ah_set_dlid(ah_attr, wc->slid);
- rdma_ah_set_sl(ah_attr, wc->sl);
- rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- rdma_ah_set_port_num(ah_attr, port_num);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ return ib_resolve_unicast_gid_dmac(device, ah_attr);
+ } else {
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- if (wc->wc_flags & IB_WC_GRH) {
- if (!rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_GRH) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
IB_GID_TYPE_IB,
@@ -584,18 +622,17 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
} else {
gid_index = 0;
}
- }
-
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
- flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ }
+ return 0;
}
- return 0;
}
-EXPORT_SYMBOL(ib_init_ah_from_wc);
+EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
@@ -603,7 +640,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
struct rdma_ah_attr ah_attr;
int ret;
- ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+ ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
@@ -850,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
- qp = device->create_qp(pd, qp_init_attr, NULL);
+ qp = _ib_create_qp(device, pd, qp_init_attr, NULL);
if (IS_ERR(qp))
return qp;
@@ -860,7 +897,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
return ERR_PTR(ret);
}
- qp->device = device;
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
@@ -890,7 +926,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
atomic_inc(&qp_init_attr->srq->usecnt);
}
- qp->pd = pd;
qp->send_cq = qp_init_attr->send_cq;
qp->xrcd = NULL;
@@ -1269,16 +1304,8 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
- return 0;
-
grh = rdma_ah_retrieve_grh(ah_attr);
- if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
- ah_attr->roce.dmac);
- return 0;
- }
if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
__be32 addr = 0;
@@ -1290,40 +1317,52 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
(char *)ah_attr->roce.dmac);
}
} else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(device,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
-
- ifindex = sgid_attr.ndev->ifindex;
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ }
+ return ret;
+}
- ret =
- rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
- ah_attr->roce.dmac,
- NULL, &ifindex, &hop_limit);
+/**
+ * IB core internal function to perform QP attributes modification.
+ */
+static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ int ret;
- dev_put(sgid_attr.ndev);
+ if (rdma_ib_or_roce(qp->device, port)) {
+ if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
+ pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->rq_psn &= 0xffffff;
+ }
- grh->hop_limit = hop_limit;
+ if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
+ pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
+ __func__, qp->device->name);
+ attr->sq_psn &= 0xffffff;
+ }
}
-out:
+
+ ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
+ if (!ret && (attr_mask & IB_QP_PORT))
+ qp->port = attr->port_num;
+
return ret;
}
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
+
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
- * @qp: The QP to modify.
+ * @ib_qp: The QP to modify.
* @attr: On input, specifies the QP attributes to modify. On output,
* the current values of selected QP attributes are returned.
* @attr_mask: A bit-mask used to specify which attributes of the QP
@@ -1332,21 +1371,20 @@ out:
* are being modified.
* It returns 0 on success and returns appropriate error code on error.
*/
-int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
+int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
+ struct ib_qp *qp = ib_qp->real_qp;
int ret;
- if (attr_mask & IB_QP_AV) {
+ if (attr_mask & IB_QP_AV &&
+ attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
if (ret)
return ret;
}
- ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
- if (!ret && (attr_mask & IB_QP_PORT))
- qp->port = attr->port_num;
-
- return ret;
+ return _ib_modify_qp(qp, attr, attr_mask, udata);
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
@@ -1409,7 +1447,7 @@ int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL);
+ return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -1503,6 +1541,7 @@ int ib_destroy_qp(struct ib_qp *qp)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
+ rdma_restrack_del(&qp->res);
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
@@ -1545,6 +1584,8 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
}
return cq;
@@ -1563,6 +1604,7 @@ int ib_destroy_cq(struct ib_cq *cq)
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ rdma_restrack_del(&cq->res);
return cq->device->destroy_cq(cq);
}
EXPORT_SYMBOL(ib_destroy_cq);
@@ -1747,7 +1789,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
{
struct ib_xrcd *xrcd;
@@ -1765,7 +1807,7 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
return xrcd;
}
-EXPORT_SYMBOL(ib_alloc_xrcd);
+EXPORT_SYMBOL(__ib_alloc_xrcd);
int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
@@ -1790,11 +1832,11 @@ EXPORT_SYMBOL(ib_dealloc_xrcd);
* ib_create_wq - Creates a WQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the WQ.
- * @wq_init_attr: A list of initial attributes required to create the
+ * @wq_attr: A list of initial attributes required to create the
* WQ. If WQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created WQ.
*
- * wq_init_attr->max_wr and wq_init_attr->max_sge determine
+ * wq_attr->max_wr and wq_attr->max_sge determine
* the requested size of the WQ, and set to the actual values allocated
* on return.
* If ib_create_wq() succeeds, then max_wr and max_sge will always be
@@ -2156,16 +2198,16 @@ static void __ib_drain_sq(struct ib_qp *qp)
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- swr.wr_cqe = &sdrain.cqe;
- sdrain.cqe.done = ib_drain_qp_done;
- init_completion(&sdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
+ swr.wr_cqe = &sdrain.cqe;
+ sdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
ret = ib_post_send(qp, &swr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
@@ -2190,16 +2232,16 @@ static void __ib_drain_rq(struct ib_qp *qp)
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- rwr.wr_cqe = &rdrain.cqe;
- rdrain.cqe.done = ib_drain_qp_done;
- init_completion(&rdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
ret = ib_post_recv(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ecbac91b2e14..ca32057e886f 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -43,20 +43,41 @@
#define ROCE_DRV_MODULE_VERSION "1.0.0"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
-
-#define BNXT_RE_PAGE_SIZE_4K BIT(12)
-#define BNXT_RE_PAGE_SIZE_8K BIT(13)
-#define BNXT_RE_PAGE_SIZE_64K BIT(16)
-#define BNXT_RE_PAGE_SIZE_2M BIT(21)
-#define BNXT_RE_PAGE_SIZE_8M BIT(23)
-#define BNXT_RE_PAGE_SIZE_1G BIT(30)
-
-#define BNXT_RE_MAX_MR_SIZE BIT(30)
+#define BNXT_RE_PAGE_SHIFT_4K (12)
+#define BNXT_RE_PAGE_SHIFT_8K (13)
+#define BNXT_RE_PAGE_SHIFT_64K (16)
+#define BNXT_RE_PAGE_SHIFT_2M (21)
+#define BNXT_RE_PAGE_SHIFT_8M (23)
+#define BNXT_RE_PAGE_SHIFT_1G (30)
+
+#define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K)
+#define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K)
+#define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K)
+#define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M)
+#define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M)
+#define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G)
+
+#define BNXT_RE_MAX_MR_SIZE_LOW BIT(BNXT_RE_PAGE_SHIFT_1G)
+#define BNXT_RE_MAX_MR_SIZE_HIGH BIT(39)
+#define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH
#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
+
+/* Number of MRs to reserve for PF, leaving remainder for VFs */
+#define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024)
+#define BNXT_RE_MAX_GID_PER_VF 128
+
+/*
+ * Percentage of resources of each type reserved for PF.
+ * Remaining resources are divided equally among VFs.
+ * [0, 100]
+ */
+#define BNXT_RE_PCT_RSVD_FOR_PF 50
#define BNXT_RE_UD_QP_HW_STALL 0x400000
@@ -100,6 +121,7 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5
#define BNXT_RE_FLAG_TASK_IN_PROG 6
+#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;
struct bnxt_en_dev *en_dev;
@@ -145,6 +167,9 @@ struct bnxt_re_dev {
struct bnxt_re_ah *sqp_ah;
struct bnxt_re_sqp_entries sqp_tbl[1024];
atomic_t nq_alloc_cnt;
+ u32 is_virtfn;
+ u32 num_vfs;
+ struct bnxt_qplib_roce_stats stats;
};
#define to_bnxt_re_dev(ptr, member) \
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 7b28219eba46..77416bc61e6e 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -58,16 +58,55 @@
#include "hw_counters.h"
static const char * const bnxt_re_stat_name[] = {
- [BNXT_RE_ACTIVE_QP] = "active_qps",
- [BNXT_RE_ACTIVE_SRQ] = "active_srqs",
- [BNXT_RE_ACTIVE_CQ] = "active_cqs",
- [BNXT_RE_ACTIVE_MR] = "active_mrs",
- [BNXT_RE_ACTIVE_MW] = "active_mws",
- [BNXT_RE_RX_PKTS] = "rx_pkts",
- [BNXT_RE_RX_BYTES] = "rx_bytes",
- [BNXT_RE_TX_PKTS] = "tx_pkts",
- [BNXT_RE_TX_BYTES] = "tx_bytes",
- [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors"
+ [BNXT_RE_ACTIVE_QP] = "active_qps",
+ [BNXT_RE_ACTIVE_SRQ] = "active_srqs",
+ [BNXT_RE_ACTIVE_CQ] = "active_cqs",
+ [BNXT_RE_ACTIVE_MR] = "active_mrs",
+ [BNXT_RE_ACTIVE_MW] = "active_mws",
+ [BNXT_RE_RX_PKTS] = "rx_pkts",
+ [BNXT_RE_RX_BYTES] = "rx_bytes",
+ [BNXT_RE_TX_PKTS] = "tx_pkts",
+ [BNXT_RE_TX_BYTES] = "tx_bytes",
+ [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
+ [BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
+ [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
+ [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
+ [BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd",
+ [BNXT_RE_MISSING_RESP] = "missin_resp",
+ [BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err",
+ [BNXT_RE_BAD_RESP_ERR] = "bad_resp_err",
+ [BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err",
+ [BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err",
+ [BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err",
+ [BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err",
+ [BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err",
+ [BNXT_RE_REMOTE_OP_ERR] = "remote_op_err",
+ [BNXT_RE_DUP_REQ] = "dup_req",
+ [BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max",
+ [BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch",
+ [BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe",
+ [BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err",
+ [BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey",
+ [BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err",
+ [BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm",
+ [BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err",
+ [BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey",
+ [BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err",
+ [BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm",
+ [BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err",
+ [BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow",
+ [BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode",
+ [BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic",
+ [BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err",
+ [BNXT_RE_RES_MEM_ERROR] = "res_mem_err",
+ [BNXT_RE_RES_SRQ_ERR] = "res_srq_err",
+ [BNXT_RE_RES_CMP_ERR] = "res_cmp_err",
+ [BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey",
+ [BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err",
+ [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
+ [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
+ [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
+ [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
};
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -76,6 +115,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma;
+ int rc = 0;
if (!port || !stats)
return -EINVAL;
@@ -97,6 +137,91 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
stats->value[BNXT_RE_TX_BYTES] =
le64_to_cpu(bnxt_re_stats->tx_ucast_bytes);
}
+ if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
+ rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats);
+ if (rc)
+ clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
+ &rdev->flags);
+ stats->value[BNXT_RE_TO_RETRANSMITS] =
+ rdev->stats.to_retransmits;
+ stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
+ rdev->stats.seq_err_naks_rcvd;
+ stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
+ rdev->stats.max_retry_exceeded;
+ stats->value[BNXT_RE_RNR_NAKS_RCVD] =
+ rdev->stats.rnr_naks_rcvd;
+ stats->value[BNXT_RE_MISSING_RESP] =
+ rdev->stats.missing_resp;
+ stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
+ rdev->stats.unrecoverable_err;
+ stats->value[BNXT_RE_BAD_RESP_ERR] =
+ rdev->stats.bad_resp_err;
+ stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
+ rdev->stats.local_qp_op_err;
+ stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
+ rdev->stats.local_protection_err;
+ stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
+ rdev->stats.mem_mgmt_op_err;
+ stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
+ rdev->stats.remote_invalid_req_err;
+ stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
+ rdev->stats.remote_access_err;
+ stats->value[BNXT_RE_REMOTE_OP_ERR] =
+ rdev->stats.remote_op_err;
+ stats->value[BNXT_RE_DUP_REQ] =
+ rdev->stats.dup_req;
+ stats->value[BNXT_RE_RES_EXCEED_MAX] =
+ rdev->stats.res_exceed_max;
+ stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
+ rdev->stats.res_length_mismatch;
+ stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
+ rdev->stats.res_exceeds_wqe;
+ stats->value[BNXT_RE_RES_OPCODE_ERR] =
+ rdev->stats.res_opcode_err;
+ stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
+ rdev->stats.res_rx_invalid_rkey;
+ stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
+ rdev->stats.res_rx_domain_err;
+ stats->value[BNXT_RE_RES_RX_NO_PERM] =
+ rdev->stats.res_rx_no_perm;
+ stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
+ rdev->stats.res_rx_range_err;
+ stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
+ rdev->stats.res_tx_invalid_rkey;
+ stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
+ rdev->stats.res_tx_domain_err;
+ stats->value[BNXT_RE_RES_TX_NO_PERM] =
+ rdev->stats.res_tx_no_perm;
+ stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
+ rdev->stats.res_tx_range_err;
+ stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
+ rdev->stats.res_irrq_oflow;
+ stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
+ rdev->stats.res_unsup_opcode;
+ stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
+ rdev->stats.res_unaligned_atomic;
+ stats->value[BNXT_RE_RES_REM_INV_ERR] =
+ rdev->stats.res_rem_inv_err;
+ stats->value[BNXT_RE_RES_MEM_ERROR] =
+ rdev->stats.res_mem_error;
+ stats->value[BNXT_RE_RES_SRQ_ERR] =
+ rdev->stats.res_srq_err;
+ stats->value[BNXT_RE_RES_CMP_ERR] =
+ rdev->stats.res_cmp_err;
+ stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
+ rdev->stats.res_invalid_dup_rkey;
+ stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
+ rdev->stats.res_wqe_format_err;
+ stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
+ rdev->stats.res_cq_load_err;
+ stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
+ rdev->stats.res_srq_load_err;
+ stats->value[BNXT_RE_RES_TX_PCI_ERR] =
+ rdev->stats.res_tx_pci_err;
+ stats->value[BNXT_RE_RES_RX_PCI_ERR] =
+ rdev->stats.res_rx_pci_err;
+ }
+
return ARRAY_SIZE(bnxt_re_stat_name);
}
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index be0dc0093b58..a01a922717d5 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -51,6 +51,45 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
+ BNXT_RE_TO_RETRANSMITS,
+ BNXT_RE_SEQ_ERR_NAKS_RCVD,
+ BNXT_RE_MAX_RETRY_EXCEEDED,
+ BNXT_RE_RNR_NAKS_RCVD,
+ BNXT_RE_MISSING_RESP,
+ BNXT_RE_UNRECOVERABLE_ERR,
+ BNXT_RE_BAD_RESP_ERR,
+ BNXT_RE_LOCAL_QP_OP_ERR,
+ BNXT_RE_LOCAL_PROTECTION_ERR,
+ BNXT_RE_MEM_MGMT_OP_ERR,
+ BNXT_RE_REMOTE_INVALID_REQ_ERR,
+ BNXT_RE_REMOTE_ACCESS_ERR,
+ BNXT_RE_REMOTE_OP_ERR,
+ BNXT_RE_DUP_REQ,
+ BNXT_RE_RES_EXCEED_MAX,
+ BNXT_RE_RES_LENGTH_MISMATCH,
+ BNXT_RE_RES_EXCEEDS_WQE,
+ BNXT_RE_RES_OPCODE_ERR,
+ BNXT_RE_RES_RX_INVALID_RKEY,
+ BNXT_RE_RES_RX_DOMAIN_ERR,
+ BNXT_RE_RES_RX_NO_PERM,
+ BNXT_RE_RES_RX_RANGE_ERR,
+ BNXT_RE_RES_TX_INVALID_RKEY,
+ BNXT_RE_RES_TX_DOMAIN_ERR,
+ BNXT_RE_RES_TX_NO_PERM,
+ BNXT_RE_RES_TX_RANGE_ERR,
+ BNXT_RE_RES_IRRQ_OFLOW,
+ BNXT_RE_RES_UNSUP_OPCODE,
+ BNXT_RE_RES_UNALIGNED_ATOMIC,
+ BNXT_RE_RES_REM_INV_ERR,
+ BNXT_RE_RES_MEM_ERROR,
+ BNXT_RE_RES_SRQ_ERR,
+ BNXT_RE_RES_CMP_ERR,
+ BNXT_RE_RES_INVALID_DUP_RKEY,
+ BNXT_RE_RES_WQE_FORMAT_ERR,
+ BNXT_RE_RES_CQ_LOAD_ERR,
+ BNXT_RE_RES_SRQ_LOAD_ERR,
+ BNXT_RE_RES_TX_PCI_ERR,
+ BNXT_RE_RES_RX_PCI_ERR,
BNXT_RE_NUM_COUNTERS
};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 2032db7db766..9b8fa77b8831 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -141,12 +141,13 @@ int bnxt_re_query_device(struct ib_device *ibdev,
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
memset(ib_attr, 0, sizeof(*ib_attr));
-
- ib_attr->fw_ver = (u64)(unsigned long)(dev_attr->fw_ver);
+ memcpy(&ib_attr->fw_ver, dev_attr->fw_ver,
+ min(sizeof(dev_attr->fw_ver),
+ sizeof(ib_attr->fw_ver)));
bnxt_qplib_get_guid(rdev->netdev->dev_addr,
(u8 *)&ib_attr->sys_image_guid);
ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE;
- ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K;
+ ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M;
ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
@@ -247,8 +248,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
IB_PORT_VENDOR_CLASS_SUP |
IB_PORT_IP_BASED_GIDS;
- /* Max MSG size set to 2G for now */
- port_attr->max_msg_sz = 0x80000000;
+ port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
port_attr->bad_pkey_cntr = 0;
port_attr->qkey_viol_cntr = 0;
port_attr->pkey_tbl_len = dev_attr->max_pkey;
@@ -281,6 +281,15 @@ int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
+ rdev->dev_attr.fw_ver[0], rdev->dev_attr.fw_ver[1],
+ rdev->dev_attr.fw_ver[2], rdev->dev_attr.fw_ver[3]);
+}
+
int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
u16 index, u16 *pkey)
{
@@ -532,7 +541,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
pbl_tbl = dma_addr;
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
- BNXT_RE_FENCE_PBL_SIZE, false);
+ BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
goto fail;
@@ -1018,6 +1027,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
struct bnxt_re_qp *qp;
struct bnxt_re_cq *cq;
+ struct bnxt_re_srq *srq;
int rc, entries;
if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) ||
@@ -1073,9 +1083,15 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
}
if (qp_init_attr->srq) {
- dev_err(rdev_to_dev(rdev), "SRQ not supported");
- rc = -ENOTSUPP;
- goto fail;
+ srq = container_of(qp_init_attr->srq, struct bnxt_re_srq,
+ ib_srq);
+ if (!srq) {
+ dev_err(rdev_to_dev(rdev), "SRQ not found");
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->qplib_qp.srq = &srq->qplib_srq;
+ qp->qplib_qp.rq.max_wqe = 0;
} else {
/* Allocate 1 more than what's provided so posting max doesn't
* mean empty
@@ -1280,6 +1296,237 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
}
}
+/* Shared Receive Queues */
+int bnxt_re_destroy_srq(struct ib_srq *ib_srq)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_re_dev *rdev = srq->rdev;
+ struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
+ struct bnxt_qplib_nq *nq = NULL;
+ int rc;
+
+ if (qplib_srq->cq)
+ nq = qplib_srq->cq->nq;
+ rc = bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Destroy HW SRQ failed!");
+ return rc;
+ }
+
+ if (srq->umem && !IS_ERR(srq->umem))
+ ib_umem_release(srq->umem);
+ kfree(srq);
+ atomic_dec(&rdev->srq_count);
+ if (nq)
+ nq->budget--;
+ return 0;
+}
+
+static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
+ struct bnxt_re_pd *pd,
+ struct bnxt_re_srq *srq,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_srq_req ureq;
+ struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
+ struct ib_umem *umem;
+ int bytes = 0;
+ struct ib_ucontext *context = pd->ib_pd.uobject->context;
+ struct bnxt_re_ucontext *cntx = container_of(context,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+ return -EFAULT;
+
+ bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(context, ureq.srqva, bytes,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem))
+ return PTR_ERR(umem);
+
+ srq->umem = umem;
+ qplib_srq->nmap = umem->nmap;
+ qplib_srq->sglist = umem->sg_head.sgl;
+ qplib_srq->srq_handle = ureq.srq_handle;
+ qplib_srq->dpi = &cntx->dpi;
+
+ return 0;
+}
+
+struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_re_srq *srq;
+ struct bnxt_qplib_nq *nq = NULL;
+ int rc, entries;
+
+ if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) {
+ dev_err(rdev_to_dev(rdev), "Create CQ failed - max exceeded");
+ rc = -EINVAL;
+ goto exit;
+ }
+
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+ rc = -ENOTSUPP;
+ goto exit;
+ }
+
+ srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+ srq->rdev = rdev;
+ srq->qplib_srq.pd = &pd->qplib_pd;
+ srq->qplib_srq.dpi = &rdev->dpi_privileged;
+ /* Allocate 1 more than what's provided so posting max doesn't
+ * mean empty
+ */
+ entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
+ if (entries > dev_attr->max_srq_wqes + 1)
+ entries = dev_attr->max_srq_wqes + 1;
+
+ srq->qplib_srq.max_wqe = entries;
+ srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
+ srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
+ srq->srq_limit = srq_init_attr->attr.srq_limit;
+ srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
+ nq = &rdev->nq[0];
+
+ if (udata) {
+ rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
+ if (rc)
+ goto fail;
+ }
+
+ rc = bnxt_qplib_create_srq(&rdev->qplib_res, &srq->qplib_srq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Create HW SRQ failed!");
+ goto fail;
+ }
+
+ if (udata) {
+ struct bnxt_re_srq_resp resp;
+
+ resp.srqid = srq->qplib_srq.id;
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!");
+ bnxt_qplib_destroy_srq(&rdev->qplib_res,
+ &srq->qplib_srq);
+ goto exit;
+ }
+ }
+ if (nq)
+ nq->budget++;
+ atomic_inc(&rdev->srq_count);
+
+ return &srq->ib_srq;
+
+fail:
+ if (udata && srq->umem && !IS_ERR(srq->umem)) {
+ ib_umem_release(srq->umem);
+ srq->umem = NULL;
+ }
+
+ kfree(srq);
+exit:
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_re_dev *rdev = srq->rdev;
+ int rc;
+
+ switch (srq_attr_mask) {
+ case IB_SRQ_MAX_WR:
+ /* SRQ resize is not supported */
+ break;
+ case IB_SRQ_LIMIT:
+ /* Change the SRQ threshold */
+ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe)
+ return -EINVAL;
+
+ srq->qplib_srq.threshold = srq_attr->srq_limit;
+ rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Modify HW SRQ failed!");
+ return rc;
+ }
+ /* On success, update the shadow */
+ srq->srq_limit = srq_attr->srq_limit;
+ /* No need to Build and send response back to udata */
+ break;
+ default:
+ dev_err(rdev_to_dev(rdev),
+ "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_re_srq tsrq;
+ struct bnxt_re_dev *rdev = srq->rdev;
+ int rc;
+
+ /* Get live SRQ attr */
+ tsrq.qplib_srq.id = srq->qplib_srq.id;
+ rc = bnxt_qplib_query_srq(&rdev->qplib_res, &tsrq.qplib_srq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Query HW SRQ failed!");
+ return rc;
+ }
+ srq_attr->max_wr = srq->qplib_srq.max_wqe;
+ srq_attr->max_sge = srq->qplib_srq.max_sge;
+ srq_attr->srq_limit = tsrq.qplib_srq.threshold;
+
+ return 0;
+}
+
+int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_qplib_swqe wqe;
+ unsigned long flags;
+ int rc = 0, payload_sz = 0;
+
+ spin_lock_irqsave(&srq->lock, flags);
+ while (wr) {
+ /* Transcribe each ib_recv_wr to qplib_swqe */
+ wqe.num_sge = wr->num_sge;
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+ wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ rc = bnxt_qplib_post_srq_recv(&srq->qplib_srq, &wqe);
+ if (rc) {
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return rc;
+}
static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_re_qp *qp1_qp,
int qp_attr_mask)
@@ -2295,10 +2542,14 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
/* Completion Queues */
int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
{
- struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
- struct bnxt_re_dev *rdev = cq->rdev;
int rc;
- struct bnxt_qplib_nq *nq = cq->qplib_cq.nq;
+ struct bnxt_re_cq *cq;
+ struct bnxt_qplib_nq *nq;
+ struct bnxt_re_dev *rdev;
+
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ rdev = cq->rdev;
+ nq = cq->qplib_cq.nq;
rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
if (rc) {
@@ -2308,12 +2559,11 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
if (!IS_ERR_OR_NULL(cq->umem))
ib_umem_release(cq->umem);
- if (cq) {
- kfree(cq->cql);
- kfree(cq);
- }
atomic_dec(&rdev->cq_count);
nq->budget--;
+ kfree(cq->cql);
+ kfree(cq);
+
return 0;
}
@@ -3078,7 +3328,8 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
mr->qplib_mr.hwq.level = PBL_LVL_MAX;
mr->qplib_mr.total_size = -1; /* Infinte length */
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false,
+ PAGE_SIZE);
if (rc)
goto fail_mr;
@@ -3104,10 +3355,8 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
int rc;
rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc) {
+ if (rc)
dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
- return rc;
- }
if (mr->pages) {
rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
@@ -3170,7 +3419,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
if (rc)
- goto fail;
+ goto bail;
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->ib_mr.rkey = mr->ib_mr.lkey;
@@ -3192,9 +3441,10 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
return &mr->ib_mr;
fail_mr:
- bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
-fail:
kfree(mr->pages);
+fail:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+bail:
kfree(mr);
return ERR_PTR(rc);
}
@@ -3248,6 +3498,46 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
return rc;
}
+static int bnxt_re_page_size_ok(int page_shift)
+{
+ switch (page_shift) {
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K:
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K:
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K:
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M:
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K:
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M:
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M:
+ case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
+ int page_shift)
+{
+ u64 *pbl_tbl = pbl_tbl_orig;
+ u64 paddr;
+ u64 page_mask = (1ULL << page_shift) - 1;
+ int i, pages;
+ struct scatterlist *sg;
+ int entry;
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ pages = sg_dma_len(sg) >> PAGE_SHIFT;
+ for (i = 0; i < pages; i++) {
+ paddr = sg_dma_address(sg) + (i << PAGE_SHIFT);
+ if (pbl_tbl == pbl_tbl_orig)
+ *pbl_tbl++ = paddr & ~page_mask;
+ else if ((paddr & page_mask) == 0)
+ *pbl_tbl++ = paddr;
+ }
+ }
+ return pbl_tbl - pbl_tbl_orig;
+}
+
/* uverbs */
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
@@ -3257,10 +3547,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr;
struct ib_umem *umem;
- u64 *pbl_tbl, *pbl_tbl_orig;
- int i, umem_pgs, pages, rc;
- struct scatterlist *sg;
- int entry;
+ u64 *pbl_tbl = NULL;
+ int umem_pgs, page_shift, rc;
if (length > BNXT_RE_MAX_MR_SIZE) {
dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n",
@@ -3277,64 +3565,68 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
+ goto free_mr;
+ }
+ /* The fixed portion of the rkey is the same as the lkey */
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
umem = ib_umem_get(ib_pd->uobject->context, start, length,
mr_access_flags, 0);
if (IS_ERR(umem)) {
dev_err(rdev_to_dev(rdev), "Failed to get umem");
rc = -EFAULT;
- goto free_mr;
+ goto free_mrw;
}
mr->ib_umem = umem;
- rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
- goto release_umem;
- }
- /* The fixed portion of the rkey is the same as the lkey */
- mr->ib_mr.rkey = mr->qplib_mr.rkey;
-
mr->qplib_mr.va = virt_addr;
umem_pgs = ib_umem_page_count(umem);
if (!umem_pgs) {
dev_err(rdev_to_dev(rdev), "umem is invalid!");
rc = -EINVAL;
- goto free_mrw;
+ goto free_umem;
}
mr->qplib_mr.total_size = length;
pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
if (!pbl_tbl) {
- rc = -EINVAL;
- goto free_mrw;
+ rc = -ENOMEM;
+ goto free_umem;
}
- pbl_tbl_orig = pbl_tbl;
- if (umem->hugetlb) {
- dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!");
+ page_shift = umem->page_shift;
+
+ if (!bnxt_re_page_size_ok(page_shift)) {
+ dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
rc = -EFAULT;
goto fail;
}
- if (umem->page_shift != PAGE_SHIFT) {
- dev_err(rdev_to_dev(rdev), "umem page shift unsupported!");
- rc = -EFAULT;
+ if (!umem->hugetlb && length > BNXT_RE_MAX_MR_SIZE_LOW) {
+ dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu",
+ length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
+ rc = -EINVAL;
goto fail;
}
- /* Map umem buf ptrs to the PBL */
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- pages = sg_dma_len(sg) >> umem->page_shift;
- for (i = 0; i < pages; i++, pbl_tbl++)
- *pbl_tbl = sg_dma_address(sg) + (i << umem->page_shift);
+ if (umem->hugetlb && length > BNXT_RE_PAGE_SIZE_2M) {
+ page_shift = BNXT_RE_PAGE_SHIFT_2M;
+ dev_warn(rdev_to_dev(rdev), "umem hugetlb set page_size %x",
+ 1 << page_shift);
}
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig,
- umem_pgs, false);
+
+ /* Map umem buf ptrs to the PBL */
+ umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
+ umem_pgs, false, 1 << page_shift);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to register user MR");
goto fail;
}
- kfree(pbl_tbl_orig);
+ kfree(pbl_tbl);
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->ib_mr.rkey = mr->qplib_mr.lkey;
@@ -3342,11 +3634,11 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
return &mr->ib_mr;
fail:
- kfree(pbl_tbl_orig);
+ kfree(pbl_tbl);
+free_umem:
+ ib_umem_release(umem);
free_mrw:
bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
-release_umem:
- ib_umem_release(umem);
free_mr:
kfree(mr);
return ERR_PTR(rc);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 1df11ed272ea..423ebe012f95 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -68,6 +68,15 @@ struct bnxt_re_ah {
struct bnxt_qplib_ah qplib_ah;
};
+struct bnxt_re_srq {
+ struct bnxt_re_dev *rdev;
+ u32 srq_limit;
+ struct ib_srq ib_srq;
+ struct bnxt_qplib_srq qplib_srq;
+ struct ib_umem *umem;
+ spinlock_t lock; /* protect srq */
+};
+
struct bnxt_re_qp {
struct list_head list;
struct bnxt_re_dev *rdev;
@@ -143,6 +152,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr *port_attr);
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable);
+void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
u16 index, u16 *pkey);
int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
@@ -164,6 +174,16 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_destroy_ah(struct ib_ah *ah);
+struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+int bnxt_re_destroy_srq(struct ib_srq *srq);
+int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index aafc19aa5de1..508d00a5a106 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -80,6 +80,79 @@ static DEFINE_MUTEX(bnxt_re_dev_lock);
static struct workqueue_struct *bnxt_re_wq;
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait);
+/* SR-IOV helper functions */
+
+static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
+{
+ struct bnxt *bp;
+
+ bp = netdev_priv(rdev->en_dev->net);
+ if (BNXT_VF(bp))
+ rdev->is_virtfn = 1;
+}
+
+/* Set the maximum number of each resource that the driver actually wants
+ * to allocate. This may be up to the maximum number the firmware has
+ * reserved for the function. The driver may choose to allocate fewer
+ * resources than the firmware maximum.
+ */
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+ u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0;
+ u32 i;
+ u32 vf_pct;
+ u32 num_vfs;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+
+ rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
+ dev_attr->max_qp);
+
+ rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
+ /* Use max_mr from fw since max_mrw does not get set */
+ rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count,
+ dev_attr->max_mr);
+ rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
+ dev_attr->max_srq);
+ rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT,
+ dev_attr->max_cq);
+
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_count[i] =
+ rdev->dev_attr.tqm_alloc_reqs[i];
+
+ if (rdev->num_vfs) {
+ /*
+ * Reserve a set of resources for the PF. Divide the remaining
+ * resources among the VFs
+ */
+ vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
+ num_vfs = 100 * rdev->num_vfs;
+ vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs;
+ vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs;
+ vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs;
+ /*
+ * The driver allows many more MRs than other resources. If the
+ * firmware does also, then reserve a fixed amount for the PF
+ * and divide the rest among VFs. VFs may use many MRs for NFS
+ * mounts, ISER, NVME applications, etc. If the firmware
+ * severely restricts the number of MRs, then let PF have
+ * half and divide the rest among VFs, as for the other
+ * resource types.
+ */
+ if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K)
+ vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs;
+ else
+ vf_mrws = (rdev->qplib_ctx.mrw_count -
+ BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs;
+ vf_gids = BNXT_RE_MAX_GID_PER_VF;
+ }
+ rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws;
+ rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids;
+ rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps;
+ rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs;
+ rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs;
+}
+
/* for handling bnxt_en callbacks later */
static void bnxt_re_stop(void *p)
{
@@ -91,6 +164,15 @@ static void bnxt_re_start(void *p)
static void bnxt_re_sriov_config(void *p, int num_vfs)
{
+ struct bnxt_re_dev *rdev = p;
+
+ if (!rdev)
+ return;
+
+ rdev->num_vfs = num_vfs;
+ bnxt_re_set_resource_limits(rdev);
+ bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
+ &rdev->qplib_ctx);
}
static void bnxt_re_shutdown(void *p)
@@ -417,7 +499,7 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
return ERR_PTR(-EINVAL);
if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
- dev_dbg(&pdev->dev,
+ dev_info(&pdev->dev,
"%s: probe error: RoCE is not supported on this device",
ROCE_DRV_MODULE_NAME);
return ERR_PTR(-ENODEV);
@@ -490,6 +572,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->query_port = bnxt_re_query_port;
ibdev->get_port_immutable = bnxt_re_get_port_immutable;
+ ibdev->get_dev_fw_str = bnxt_re_query_fw_str;
ibdev->query_pkey = bnxt_re_query_pkey;
ibdev->query_gid = bnxt_re_query_gid;
ibdev->get_netdev = bnxt_re_get_netdev;
@@ -505,6 +588,12 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->query_ah = bnxt_re_query_ah;
ibdev->destroy_ah = bnxt_re_destroy_ah;
+ ibdev->create_srq = bnxt_re_create_srq;
+ ibdev->modify_srq = bnxt_re_modify_srq;
+ ibdev->query_srq = bnxt_re_query_srq;
+ ibdev->destroy_srq = bnxt_re_destroy_srq;
+ ibdev->post_srq_recv = bnxt_re_post_srq_recv;
+
ibdev->create_qp = bnxt_re_create_qp;
ibdev->modify_qp = bnxt_re_modify_qp;
ibdev->query_qp = bnxt_re_query_qp;
@@ -541,14 +630,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
-}
-
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
@@ -558,12 +639,10 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
-static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
static struct device_attribute *bnxt_re_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_rev,
&dev_attr_hca_type
};
@@ -616,10 +695,10 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
return rdev;
}
-static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
- struct creq_func_event *aeqe)
+static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
+ *unaffi_async)
{
- switch (aeqe->event) {
+ switch (unaffi_async->event) {
case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
break;
case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
@@ -648,6 +727,93 @@ static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
return 0;
}
+static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
+ struct bnxt_re_qp *qp)
+{
+ struct ib_event event;
+
+ memset(&event, 0, sizeof(event));
+ if (qp->qplib_qp.srq) {
+ event.device = &qp->rdev->ibdev;
+ event.element.qp = &qp->ib_qp;
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ }
+
+ if (event.device && qp->ib_qp.event_handler)
+ qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+ return 0;
+}
+
+static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
+ void *obj)
+{
+ int rc = 0;
+ u8 event;
+
+ if (!obj)
+ return rc; /* QP was already dead, still return success */
+
+ event = affi_async->event;
+ if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
+ struct bnxt_qplib_qp *lib_qp = obj;
+ struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
+ qplib_qp);
+ rc = bnxt_re_handle_qp_async_event(affi_async, qp);
+ }
+ return rc;
+}
+
+static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
+ void *aeqe, void *obj)
+{
+ struct creq_qp_event *affi_async;
+ struct creq_func_event *unaffi_async;
+ u8 type;
+ int rc;
+
+ type = ((struct creq_base *)aeqe)->type;
+ if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
+ unaffi_async = aeqe;
+ rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
+ } else {
+ affi_async = aeqe;
+ rc = bnxt_re_handle_affi_async_event(affi_async, obj);
+ }
+
+ return rc;
+}
+
+static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_srq *handle, u8 event)
+{
+ struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
+ qplib_srq);
+ struct ib_event ib_event;
+ int rc = 0;
+
+ if (!srq) {
+ dev_err(NULL, "%s: SRQ is NULL, SRQN not handled",
+ ROCE_DRV_MODULE_NAME);
+ rc = -EINVAL;
+ goto done;
+ }
+ ib_event.device = &srq->rdev->ibdev;
+ ib_event.element.srq = &srq->ib_srq;
+ if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
+ ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ else
+ ib_event.event = IB_EVENT_SRQ_ERR;
+
+ if (srq->ib_srq.event_handler) {
+ /* Lock event_handler? */
+ (*srq->ib_srq.event_handler)(&ib_event,
+ srq->ib_srq.srq_context);
+ }
+done:
+ return rc;
+}
+
static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
struct bnxt_qplib_cq *handle)
{
@@ -690,7 +856,8 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
i - 1, rdev->msix_entries[i].vector,
rdev->msix_entries[i].db_offset,
- &bnxt_re_cqn_handler, NULL);
+ &bnxt_re_cqn_handler,
+ &bnxt_re_srqn_handler);
if (rc) {
dev_err(rdev_to_dev(rdev),
@@ -734,7 +901,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
+ rdev->is_virtfn);
if (rc)
goto fail;
@@ -1035,19 +1203,6 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
}
}
-static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
-{
- u32 i;
-
- rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
- rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
- rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
- rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
- for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
- rdev->qplib_ctx.tqm_count[i] =
- rdev->dev_attr.tqm_alloc_reqs[i];
-}
-
/* worker thread for polling periodic events. Now used for QoS programming*/
static void bnxt_re_worker(struct work_struct *work)
{
@@ -1070,6 +1225,9 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ /* Check whether VF or PF */
+ bnxt_re_get_sriov_func_type(rdev);
+
rc = bnxt_re_request_msix(rdev);
if (rc) {
pr_err("Failed to get MSI-X vectors: %#x\n", rc);
@@ -1101,16 +1259,18 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
(rdev->en_dev->pdev, &rdev->rcfw,
rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
- 0, &bnxt_re_aeq_handler);
+ rdev->is_virtfn, &bnxt_re_aeq_handler);
if (rc) {
pr_err("Failed to enable RCFW channel: %#x\n", rc);
goto free_ring;
}
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
+ rdev->is_virtfn);
if (rc)
goto disable_rcfw;
- bnxt_re_set_resource_limits(rdev);
+ if (!rdev->is_virtfn)
+ bnxt_re_set_resource_limits(rdev);
rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
if (rc) {
@@ -1125,7 +1285,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
goto free_ctx;
}
- rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0);
+ rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
+ rdev->is_virtfn);
if (rc) {
pr_err("Failed to initialize RCFW: %#x\n", rc);
goto free_sctx;
@@ -1144,13 +1305,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
goto fail;
}
- rc = bnxt_re_setup_qos(rdev);
- if (rc)
- pr_info("RoCE priority not yet configured\n");
+ if (!rdev->is_virtfn) {
+ rc = bnxt_re_setup_qos(rdev);
+ if (rc)
+ pr_info("RoCE priority not yet configured\n");
- INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
- set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
- schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+ INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
+ set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+ }
/* Register ib dev */
rc = bnxt_re_register_ib(rdev);
@@ -1176,6 +1339,7 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
+ set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
@@ -1400,7 +1564,7 @@ err_netdev:
static void __exit bnxt_re_mod_exit(void)
{
- struct bnxt_re_dev *rdev;
+ struct bnxt_re_dev *rdev, *next;
LIST_HEAD(to_be_deleted);
mutex_lock(&bnxt_re_dev_lock);
@@ -1408,8 +1572,11 @@ static void __exit bnxt_re_mod_exit(void)
if (!list_empty(&bnxt_re_dev_list))
list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
mutex_unlock(&bnxt_re_dev_lock);
-
- list_for_each_entry(rdev, &to_be_deleted, list) {
+ /*
+ * Cleanup the devices in reverse order so that the VF device
+ * cleanup is done before PF cleanup
+ */
+ list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
dev_info(rdev_to_dev(rdev), "Unregistering Device");
bnxt_re_dev_stop(rdev);
bnxt_re_ib_unreg(rdev, true);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 61764f7aa79b..8b5f11ac0e42 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -52,6 +52,7 @@
static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp);
+static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type);
static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp)
{
@@ -278,6 +279,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
struct nq_base *nqe, **nq_ptr;
struct bnxt_qplib_cq *cq;
int num_cqne_processed = 0;
+ int num_srqne_processed = 0;
u32 sw_cons, raw_cons;
u16 type;
int budget = nq->budget;
@@ -320,6 +322,26 @@ static void bnxt_qplib_service_nq(unsigned long data)
spin_unlock_bh(&cq->compl_lock);
break;
}
+ case NQ_BASE_TYPE_SRQ_EVENT:
+ {
+ struct nq_srq_event *nqsrqe =
+ (struct nq_srq_event *)nqe;
+
+ q_handle = le32_to_cpu(nqsrqe->srq_handle_low);
+ q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
+ << 32;
+ bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle,
+ DBR_DBR_TYPE_SRQ_ARMENA);
+ if (!nq->srqn_handler(nq,
+ (struct bnxt_qplib_srq *)q_handle,
+ nqsrqe->event))
+ num_srqne_processed++;
+ else
+ dev_warn(&nq->pdev->dev,
+ "QPLIB: SRQ event 0x%x not handled",
+ nqsrqe->event);
+ break;
+ }
case NQ_BASE_TYPE_DBQ_EVENT:
break;
default:
@@ -384,17 +406,19 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
int (*cqn_handler)(struct bnxt_qplib_nq *nq,
struct bnxt_qplib_cq *),
int (*srqn_handler)(struct bnxt_qplib_nq *nq,
- void *, u8 event))
+ struct bnxt_qplib_srq *,
+ u8 event))
{
resource_size_t nq_base;
int rc = -1;
nq->pdev = pdev;
nq->vector = msix_vector;
+ if (cqn_handler)
+ nq->cqn_handler = cqn_handler;
- nq->cqn_handler = cqn_handler;
-
- nq->srqn_handler = srqn_handler;
+ if (srqn_handler)
+ nq->srqn_handler = srqn_handler;
tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq);
@@ -410,7 +434,6 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
if (rc) {
dev_err(&nq->pdev->dev,
"Failed to request IRQ for NQ: %#x", rc);
- bnxt_qplib_disable_nq(nq);
goto fail;
}
@@ -469,6 +492,238 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
return 0;
}
+/* SRQ */
+static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type)
+{
+ struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
+ struct dbr_dbr db_msg = { 0 };
+ void __iomem *db;
+ u32 sw_prod = 0;
+
+ /* Ring DB */
+ sw_prod = (arm_type == DBR_DBR_TYPE_SRQ_ARM) ? srq->threshold :
+ HWQ_CMP(srq_hwq->prod, srq_hwq);
+ db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
+ DBR_DBR_INDEX_MASK);
+ db_msg.type_xid = cpu_to_le32(((srq->id << DBR_DBR_XID_SFT) &
+ DBR_DBR_XID_MASK) | arm_type);
+ db = (arm_type == DBR_DBR_TYPE_SRQ_ARMENA) ?
+ srq->dbr_base : srq->dpi->dbr;
+ wmb(); /* barrier before db ring */
+ __iowrite64_copy(db, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_destroy_srq req;
+ struct creq_destroy_srq_resp resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ RCFW_CMD_PREP(req, DESTROY_SRQ, cmd_flags);
+
+ /* Configure the request */
+ req.srq_cid = cpu_to_le32(srq->id);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return rc;
+
+ bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
+ kfree(srq->swq);
+ return 0;
+}
+
+int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_create_srq req;
+ struct creq_create_srq_resp resp;
+ struct bnxt_qplib_pbl *pbl;
+ u16 cmd_flags = 0;
+ int rc, idx;
+
+ srq->hwq.max_elements = srq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, srq->sglist,
+ srq->nmap, &srq->hwq.max_elements,
+ BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto exit;
+
+ srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq),
+ GFP_KERNEL);
+ if (!srq->swq)
+ goto fail;
+
+ RCFW_CMD_PREP(req, CREATE_SRQ, cmd_flags);
+
+ /* Configure the request */
+ req.dpi = cpu_to_le32(srq->dpi->dpi);
+ req.srq_handle = cpu_to_le64(srq);
+
+ req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements);
+ pbl = &srq->hwq.pbl[PBL_LVL_0];
+ req.pg_size_lvl = cpu_to_le16((((u16)srq->hwq.level &
+ CMDQ_CREATE_SRQ_LVL_MASK) <<
+ CMDQ_CREATE_SRQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_SRQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_SRQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_SRQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_SRQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_SRQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_SRQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_SRQ_PG_SIZE_PG_4K));
+ req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.pd_id = cpu_to_le32(srq->pd->id);
+ req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ goto fail;
+
+ spin_lock_init(&srq->lock);
+ srq->start_idx = 0;
+ srq->last_idx = srq->hwq.max_elements - 1;
+ for (idx = 0; idx < srq->hwq.max_elements; idx++)
+ srq->swq[idx].next_idx = idx + 1;
+ srq->swq[srq->last_idx].next_idx = -1;
+
+ srq->id = le32_to_cpu(resp.xid);
+ srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
+ if (srq->threshold)
+ bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARMENA);
+ srq->arm_req = false;
+
+ return 0;
+fail:
+ bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
+ kfree(srq->swq);
+exit:
+ return rc;
+}
+
+int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq)
+{
+ struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
+ u32 sw_prod, sw_cons, count = 0;
+
+ sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
+ sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
+
+ count = sw_prod > sw_cons ? sw_prod - sw_cons :
+ srq_hwq->max_elements - sw_cons + sw_prod;
+ if (count > srq->threshold) {
+ srq->arm_req = false;
+ bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
+ } else {
+ /* Deferred arming */
+ srq->arm_req = true;
+ }
+
+ return 0;
+}
+
+int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_query_srq req;
+ struct creq_query_srq_resp resp;
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct creq_query_srq_resp_sb *sb;
+ u16 cmd_flags = 0;
+ int rc = 0;
+
+ RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags);
+ req.srq_cid = cpu_to_le32(srq->id);
+
+ /* Configure the request */
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf)
+ return -ENOMEM;
+ sb = sbuf->sb;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ (void *)sbuf, 0);
+ srq->threshold = le16_to_cpu(sb->srq_limit);
+ bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+
+ return rc;
+}
+
+int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
+ struct rq_wqe *srqe, **srqe_ptr;
+ struct sq_sge *hw_sge;
+ u32 sw_prod, sw_cons, count = 0;
+ int i, rc = 0, next;
+
+ spin_lock(&srq_hwq->lock);
+ if (srq->start_idx == srq->last_idx) {
+ dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!",
+ srq->id);
+ rc = -EINVAL;
+ spin_unlock(&srq_hwq->lock);
+ goto done;
+ }
+ next = srq->start_idx;
+ srq->start_idx = srq->swq[next].next_idx;
+ spin_unlock(&srq_hwq->lock);
+
+ sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
+ srqe_ptr = (struct rq_wqe **)srq_hwq->pbl_ptr;
+ srqe = &srqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
+ memset(srqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ /* Calculate wqe_size16 and data_len */
+ for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
+ i < wqe->num_sge; i++, hw_sge++) {
+ hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+ hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+ hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ }
+ srqe->wqe_type = wqe->type;
+ srqe->flags = wqe->flags;
+ srqe->wqe_size = wqe->num_sge +
+ ((offsetof(typeof(*srqe), data) + 15) >> 4);
+ srqe->wr_id[0] = cpu_to_le32((u32)next);
+ srq->swq[next].wr_id = wqe->wr_id;
+
+ srq_hwq->prod++;
+
+ spin_lock(&srq_hwq->lock);
+ sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
+ /* retaining srq_hwq->cons for this logic
+ * actually the lock is only required to
+ * read srq_hwq->cons.
+ */
+ sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
+ count = sw_prod > sw_cons ? sw_prod - sw_cons :
+ srq_hwq->max_elements - sw_cons + sw_prod;
+ spin_unlock(&srq_hwq->lock);
+ /* Ring DB */
+ bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ);
+ if (srq->arm_req == true && count > srq->threshold) {
+ srq->arm_req = false;
+ bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
+ }
+done:
+ return rc;
+}
+
/* QP */
int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
@@ -737,6 +992,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
pbl->pg_size == ROCE_PG_SIZE_1G ?
CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G :
CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K);
+ } else {
+ /* SRQ */
+ if (qp->srq) {
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
+ req.srq_cid = cpu_to_le32(qp->srq->id);
+ }
}
if (qp->rcq)
@@ -2068,6 +2329,16 @@ done:
return rc;
}
+static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag)
+{
+ spin_lock(&srq->hwq.lock);
+ srq->swq[srq->last_idx].next_idx = (int)tag;
+ srq->last_idx = (int)tag;
+ srq->swq[srq->last_idx].next_idx = -1;
+ srq->hwq.cons++; /* Support for SRQE counter */
+ spin_unlock(&srq->hwq.lock);
+}
+
static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
struct cq_res_rc *hwcqe,
struct bnxt_qplib_cqe **pcqe,
@@ -2075,6 +2346,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
{
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
u32 wr_id_idx;
int rc = 0;
@@ -2102,27 +2374,46 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
wr_id_idx = le32_to_cpu(hwcqe->srq_or_rq_wr_id) &
CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK;
- rq = &qp->rq;
- if (wr_id_idx > rq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
- wr_id_idx, rq->hwq.max_elements);
- return -EINVAL;
- }
-
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
- cqe++;
- (*budget)--;
- rq->hwq.cons++;
- *pcqe = cqe;
+ if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) {
+ srq = qp->srq;
+ if (!srq)
+ return -EINVAL;
+ if (wr_id_idx > srq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process RC ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ wr_id_idx, srq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = srq->swq[wr_id_idx].wr_id;
+ bnxt_qplib_release_srqe(srq, wr_id_idx);
+ cqe++;
+ (*budget)--;
+ *pcqe = cqe;
+ } else {
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process RC ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
- if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
- qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
- /* Add qp to flush list of the CQ */
- bnxt_qplib_lock_buddy_cq(qp, cq);
- __bnxt_qplib_add_flush_qp(qp);
- bnxt_qplib_unlock_buddy_cq(qp, cq);
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_lock_buddy_cq(qp, cq);
+ __bnxt_qplib_add_flush_qp(qp);
+ bnxt_qplib_unlock_buddy_cq(qp, cq);
+ }
}
done:
@@ -2136,6 +2427,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
{
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
u32 wr_id_idx;
int rc = 0;
@@ -2166,27 +2458,48 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
hwcqe->src_qp_high_srq_or_rq_wr_id) &
CQ_RES_UD_SRC_QP_HIGH_MASK) >> 8);
- rq = &qp->rq;
- if (wr_id_idx > rq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx %#x exceeded RQ max %#x",
- wr_id_idx, rq->hwq.max_elements);
- return -EINVAL;
- }
+ if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) {
+ srq = qp->srq;
+ if (!srq)
+ return -EINVAL;
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
- cqe++;
- (*budget)--;
- rq->hwq.cons++;
- *pcqe = cqe;
+ if (wr_id_idx > srq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process UD ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ wr_id_idx, srq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = srq->swq[wr_id_idx].wr_id;
+ bnxt_qplib_release_srqe(srq, wr_id_idx);
+ cqe++;
+ (*budget)--;
+ *pcqe = cqe;
+ } else {
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process UD ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
- if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
- qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
- /* Add qp to flush list of the CQ */
- bnxt_qplib_lock_buddy_cq(qp, cq);
- __bnxt_qplib_add_flush_qp(qp);
- bnxt_qplib_unlock_buddy_cq(qp, cq);
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_lock_buddy_cq(qp, cq);
+ __bnxt_qplib_add_flush_qp(qp);
+ bnxt_qplib_unlock_buddy_cq(qp, cq);
+ }
}
done:
return rc;
@@ -2218,6 +2531,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
{
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
u32 wr_id_idx;
int rc = 0;
@@ -2256,26 +2570,49 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2);
cqe->raweth_qp1_metadata = le32_to_cpu(hwcqe->raweth_qp1_metadata);
- rq = &qp->rq;
- if (wr_id_idx > rq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
- dev_err(&cq->hwq.pdev->dev, "QPLIB: ix 0x%x exceeded RQ max 0x%x",
- wr_id_idx, rq->hwq.max_elements);
- return -EINVAL;
- }
-
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
- cqe++;
- (*budget)--;
- rq->hwq.cons++;
- *pcqe = cqe;
+ if (cqe->flags & CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ) {
+ srq = qp->srq;
+ if (!srq) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: SRQ used but not defined??");
+ return -EINVAL;
+ }
+ if (wr_id_idx > srq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process Raw/QP1 ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ wr_id_idx, srq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = srq->swq[wr_id_idx].wr_id;
+ bnxt_qplib_release_srqe(srq, wr_id_idx);
+ cqe++;
+ (*budget)--;
+ *pcqe = cqe;
+ } else {
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: ix 0x%x exceeded RQ max 0x%x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
- if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
- qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
- /* Add qp to flush list of the CQ */
- bnxt_qplib_lock_buddy_cq(qp, cq);
- __bnxt_qplib_add_flush_qp(qp);
- bnxt_qplib_unlock_buddy_cq(qp, cq);
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_lock_buddy_cq(qp, cq);
+ __bnxt_qplib_add_flush_qp(qp);
+ bnxt_qplib_unlock_buddy_cq(qp, cq);
+ }
}
done:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index c582d4ec8173..211b27a8f9e2 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -39,6 +39,27 @@
#ifndef __BNXT_QPLIB_FP_H__
#define __BNXT_QPLIB_FP_H__
+struct bnxt_qplib_srq {
+ struct bnxt_qplib_pd *pd;
+ struct bnxt_qplib_dpi *dpi;
+ void __iomem *dbr_base;
+ u64 srq_handle;
+ u32 id;
+ u32 max_wqe;
+ u32 max_sge;
+ u32 threshold;
+ bool arm_req;
+ struct bnxt_qplib_cq *cq;
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_swq *swq;
+ struct scatterlist *sglist;
+ int start_idx;
+ int last_idx;
+ u32 nmap;
+ u16 eventq_hw_ring_id;
+ spinlock_t lock; /* protect SRQE link list */
+};
+
struct bnxt_qplib_sge {
u64 addr;
u32 lkey;
@@ -79,6 +100,7 @@ static inline u32 get_psne_idx(u32 val)
struct bnxt_qplib_swq {
u64 wr_id;
+ int next_idx;
u8 type;
u8 flags;
u32 start_psn;
@@ -404,29 +426,27 @@ struct bnxt_qplib_cq {
writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
struct bnxt_qplib_nq {
- struct pci_dev *pdev;
-
- int vector;
- cpumask_t mask;
- int budget;
- bool requested;
- struct tasklet_struct worker;
- struct bnxt_qplib_hwq hwq;
-
- u16 bar_reg;
- u16 bar_reg_off;
- u16 ring_id;
- void __iomem *bar_reg_iomem;
-
- int (*cqn_handler)
- (struct bnxt_qplib_nq *nq,
- struct bnxt_qplib_cq *cq);
- int (*srqn_handler)
- (struct bnxt_qplib_nq *nq,
- void *srq,
- u8 event);
- struct workqueue_struct *cqn_wq;
- char name[32];
+ struct pci_dev *pdev;
+
+ int vector;
+ cpumask_t mask;
+ int budget;
+ bool requested;
+ struct tasklet_struct worker;
+ struct bnxt_qplib_hwq hwq;
+
+ u16 bar_reg;
+ u16 bar_reg_off;
+ u16 ring_id;
+ void __iomem *bar_reg_iomem;
+
+ int (*cqn_handler)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *cq);
+ int (*srqn_handler)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_srq *srq,
+ u8 event);
+ struct workqueue_struct *cqn_wq;
+ char name[32];
};
struct bnxt_qplib_nq_work {
@@ -441,8 +461,18 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
int (*cqn_handler)(struct bnxt_qplib_nq *nq,
struct bnxt_qplib_cq *cq),
int (*srqn_handler)(struct bnxt_qplib_nq *nq,
- void *srq,
+ struct bnxt_qplib_srq *srq,
u8 event));
+int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq);
+int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq);
+int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq);
+int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq);
+int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
+ struct bnxt_qplib_swqe *wqe);
int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index bb5574adf195..8329ec6a7946 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -93,7 +93,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
opcode = req->opcode;
if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
(opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
- opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW)) {
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
+ opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
dev_err(&rcfw->pdev->dev,
"QPLIB: RCFW not initialized, reject opcode 0x%x",
opcode);
@@ -615,7 +616,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
int msix_vector,
int cp_bar_reg_off, int virt_fn,
int (*aeq_handler)(struct bnxt_qplib_rcfw *,
- struct creq_func_event *))
+ void *, void *))
{
resource_size_t res_base;
struct cmdq_init init;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 2946a7cfae82..6bee6e3636ea 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -167,7 +167,7 @@ struct bnxt_qplib_rcfw {
#define FIRMWARE_TIMED_OUT 3
wait_queue_head_t waitq;
int (*aeq_handler)(struct bnxt_qplib_rcfw *,
- struct creq_func_event *);
+ void *, void *);
u32 seq_num;
/* Bar region info */
@@ -199,9 +199,8 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw,
int msix_vector,
int cp_bar_reg_off, int virt_fn,
- int (*aeq_handler)
- (struct bnxt_qplib_rcfw *,
- struct creq_func_event *));
+ int (*aeq_handler)(struct bnxt_qplib_rcfw *,
+ void *aeqe, void *obj));
struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
struct bnxt_qplib_rcfw *rcfw,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 4e101704e801..ad37d54affcc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -104,13 +104,12 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
if (!sghead) {
for (i = 0; i < pages; i++) {
- pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
- pbl->pg_size,
- &pbl->pg_map_arr[i],
- GFP_KERNEL);
+ pbl->pg_arr[i] = dma_zalloc_coherent(&pdev->dev,
+ pbl->pg_size,
+ &pbl->pg_map_arr[i],
+ GFP_KERNEL);
if (!pbl->pg_arr[i])
goto fail;
- memset(pbl->pg_arr[i], 0, pbl->pg_size);
pbl->pg_count++;
}
} else {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 9543ce51a28a..c015c1861351 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -64,8 +64,28 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
}
+static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
+ char *fw_ver)
+{
+ struct cmdq_query_version req;
+ struct creq_query_version_resp resp;
+ u16 cmd_flags = 0;
+ int rc = 0;
+
+ RCFW_CMD_PREP(req, QUERY_VERSION, cmd_flags);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, NULL, 0);
+ if (rc)
+ return;
+ fw_ver[0] = resp.fw_maj;
+ fw_ver[1] = resp.fw_minor;
+ fw_ver[2] = resp.fw_bld;
+ fw_ver[3] = resp.fw_rsvd;
+}
+
int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_dev_attr *attr)
+ struct bnxt_qplib_dev_attr *attr, bool vf)
{
struct cmdq_query_func req;
struct creq_query_func_resp resp;
@@ -95,7 +115,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
/* Extract the context from the side buffer */
attr->max_qp = le32_to_cpu(sb->max_qp);
/* max_qp value reported by FW for PF doesn't include the QP1 for PF */
- attr->max_qp += 1;
+ if (!vf)
+ attr->max_qp += 1;
attr->max_qp_rd_atom =
sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
@@ -133,7 +154,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
attr->max_sgid = le32_to_cpu(sb->max_gid);
- strlcpy(attr->fw_ver, "20.6.28.0", sizeof(attr->fw_ver));
+ bnxt_qplib_query_version(rcfw, attr->fw_ver);
for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
@@ -150,6 +171,38 @@ bail:
return rc;
}
+int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx)
+{
+ struct cmdq_set_func_resources req;
+ struct creq_set_func_resources_resp resp;
+ u16 cmd_flags = 0;
+ int rc = 0;
+
+ RCFW_CMD_PREP(req, SET_FUNC_RESOURCES, cmd_flags);
+
+ req.number_of_qp = cpu_to_le32(ctx->qpc_count);
+ req.number_of_mrw = cpu_to_le32(ctx->mrw_count);
+ req.number_of_srq = cpu_to_le32(ctx->srqc_count);
+ req.number_of_cq = cpu_to_le32(ctx->cq_count);
+
+ req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
+ req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
+ req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
+ req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
+ req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp,
+ NULL, 0);
+ if (rc) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: Failed to set function resources");
+ }
+ return rc;
+}
+
/* SGID */
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
@@ -604,7 +657,7 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
}
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block)
+ u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_register_mr req;
@@ -615,6 +668,9 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
u32 pg_size;
if (num_pbls) {
+ /* Allocate memory for the non-leaf pages to store buf ptrs.
+ * Non-leaf pages always uses system PAGE_SIZE
+ */
pg_ptrs = roundup_pow_of_two(num_pbls);
pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
if (!pages)
@@ -632,6 +688,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
mr->hwq.max_elements = pages;
+ /* Use system PAGE_SIZE */
rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0,
&mr->hwq.max_elements,
PAGE_SIZE, 0, PAGE_SIZE,
@@ -652,18 +709,22 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
/* Configure the request */
if (mr->hwq.level == PBL_LVL_MAX) {
+ /* No PBL provided, just use system PAGE_SIZE */
level = 0;
req.pbl = 0;
pg_size = PAGE_SIZE;
} else {
level = mr->hwq.level + 1;
req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
- pg_size = mr->hwq.pbl[PBL_LVL_0].pg_size;
}
+ pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE;
req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) |
((ilog2(pg_size) <<
CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) &
CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK);
+ req.log2_pbl_pg_size = cpu_to_le16(((ilog2(PAGE_SIZE) <<
+ CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT) &
+ CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK));
req.access = (mr->flags & 0xFFFF);
req.va = cpu_to_le64(mr->va);
req.key = cpu_to_le32(mr->lkey);
@@ -729,3 +790,73 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
0);
return 0;
}
+
+int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_roce_stats *stats)
+{
+ struct cmdq_query_roce_stats req;
+ struct creq_query_roce_stats_resp resp;
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct creq_query_roce_stats_resp_sb *sb;
+ u16 cmd_flags = 0;
+ int rc = 0;
+
+ RCFW_CMD_PREP(req, QUERY_ROCE_STATS, cmd_flags);
+
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
+ return -ENOMEM;
+ }
+
+ sb = sbuf->sb;
+ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+ (void *)sbuf, 0);
+ if (rc)
+ goto bail;
+ /* Extract the context from the side buffer */
+ stats->to_retransmits = le64_to_cpu(sb->to_retransmits);
+ stats->seq_err_naks_rcvd = le64_to_cpu(sb->seq_err_naks_rcvd);
+ stats->max_retry_exceeded = le64_to_cpu(sb->max_retry_exceeded);
+ stats->rnr_naks_rcvd = le64_to_cpu(sb->rnr_naks_rcvd);
+ stats->missing_resp = le64_to_cpu(sb->missing_resp);
+ stats->unrecoverable_err = le64_to_cpu(sb->unrecoverable_err);
+ stats->bad_resp_err = le64_to_cpu(sb->bad_resp_err);
+ stats->local_qp_op_err = le64_to_cpu(sb->local_qp_op_err);
+ stats->local_protection_err = le64_to_cpu(sb->local_protection_err);
+ stats->mem_mgmt_op_err = le64_to_cpu(sb->mem_mgmt_op_err);
+ stats->remote_invalid_req_err = le64_to_cpu(sb->remote_invalid_req_err);
+ stats->remote_access_err = le64_to_cpu(sb->remote_access_err);
+ stats->remote_op_err = le64_to_cpu(sb->remote_op_err);
+ stats->dup_req = le64_to_cpu(sb->dup_req);
+ stats->res_exceed_max = le64_to_cpu(sb->res_exceed_max);
+ stats->res_length_mismatch = le64_to_cpu(sb->res_length_mismatch);
+ stats->res_exceeds_wqe = le64_to_cpu(sb->res_exceeds_wqe);
+ stats->res_opcode_err = le64_to_cpu(sb->res_opcode_err);
+ stats->res_rx_invalid_rkey = le64_to_cpu(sb->res_rx_invalid_rkey);
+ stats->res_rx_domain_err = le64_to_cpu(sb->res_rx_domain_err);
+ stats->res_rx_no_perm = le64_to_cpu(sb->res_rx_no_perm);
+ stats->res_rx_range_err = le64_to_cpu(sb->res_rx_range_err);
+ stats->res_tx_invalid_rkey = le64_to_cpu(sb->res_tx_invalid_rkey);
+ stats->res_tx_domain_err = le64_to_cpu(sb->res_tx_domain_err);
+ stats->res_tx_no_perm = le64_to_cpu(sb->res_tx_no_perm);
+ stats->res_tx_range_err = le64_to_cpu(sb->res_tx_range_err);
+ stats->res_irrq_oflow = le64_to_cpu(sb->res_irrq_oflow);
+ stats->res_unsup_opcode = le64_to_cpu(sb->res_unsup_opcode);
+ stats->res_unaligned_atomic = le64_to_cpu(sb->res_unaligned_atomic);
+ stats->res_rem_inv_err = le64_to_cpu(sb->res_rem_inv_err);
+ stats->res_mem_error = le64_to_cpu(sb->res_mem_error);
+ stats->res_srq_err = le64_to_cpu(sb->res_srq_err);
+ stats->res_cmp_err = le64_to_cpu(sb->res_cmp_err);
+ stats->res_invalid_dup_rkey = le64_to_cpu(sb->res_invalid_dup_rkey);
+ stats->res_wqe_format_err = le64_to_cpu(sb->res_wqe_format_err);
+ stats->res_cq_load_err = le64_to_cpu(sb->res_cq_load_err);
+ stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
+ stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
+ stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
+bail:
+ bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 11322582f5e4..9d3e8b994945 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -45,7 +45,8 @@
#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040
struct bnxt_qplib_dev_attr {
- char fw_ver[32];
+#define FW_VER_ARR_LEN 4
+ u8 fw_ver[FW_VER_ARR_LEN];
u16 max_sgid;
u16 max_mrw;
u32 max_qp;
@@ -127,6 +128,85 @@ struct bnxt_qplib_frpl {
#define BNXT_QPLIB_ACCESS_ZERO_BASED BIT(5)
#define BNXT_QPLIB_ACCESS_ON_DEMAND BIT(6)
+struct bnxt_qplib_roce_stats {
+ u64 to_retransmits;
+ u64 seq_err_naks_rcvd;
+ /* seq_err_naks_rcvd is 64 b */
+ u64 max_retry_exceeded;
+ /* max_retry_exceeded is 64 b */
+ u64 rnr_naks_rcvd;
+ /* rnr_naks_rcvd is 64 b */
+ u64 missing_resp;
+ u64 unrecoverable_err;
+ /* unrecoverable_err is 64 b */
+ u64 bad_resp_err;
+ /* bad_resp_err is 64 b */
+ u64 local_qp_op_err;
+ /* local_qp_op_err is 64 b */
+ u64 local_protection_err;
+ /* local_protection_err is 64 b */
+ u64 mem_mgmt_op_err;
+ /* mem_mgmt_op_err is 64 b */
+ u64 remote_invalid_req_err;
+ /* remote_invalid_req_err is 64 b */
+ u64 remote_access_err;
+ /* remote_access_err is 64 b */
+ u64 remote_op_err;
+ /* remote_op_err is 64 b */
+ u64 dup_req;
+ /* dup_req is 64 b */
+ u64 res_exceed_max;
+ /* res_exceed_max is 64 b */
+ u64 res_length_mismatch;
+ /* res_length_mismatch is 64 b */
+ u64 res_exceeds_wqe;
+ /* res_exceeds_wqe is 64 b */
+ u64 res_opcode_err;
+ /* res_opcode_err is 64 b */
+ u64 res_rx_invalid_rkey;
+ /* res_rx_invalid_rkey is 64 b */
+ u64 res_rx_domain_err;
+ /* res_rx_domain_err is 64 b */
+ u64 res_rx_no_perm;
+ /* res_rx_no_perm is 64 b */
+ u64 res_rx_range_err;
+ /* res_rx_range_err is 64 b */
+ u64 res_tx_invalid_rkey;
+ /* res_tx_invalid_rkey is 64 b */
+ u64 res_tx_domain_err;
+ /* res_tx_domain_err is 64 b */
+ u64 res_tx_no_perm;
+ /* res_tx_no_perm is 64 b */
+ u64 res_tx_range_err;
+ /* res_tx_range_err is 64 b */
+ u64 res_irrq_oflow;
+ /* res_irrq_oflow is 64 b */
+ u64 res_unsup_opcode;
+ /* res_unsup_opcode is 64 b */
+ u64 res_unaligned_atomic;
+ /* res_unaligned_atomic is 64 b */
+ u64 res_rem_inv_err;
+ /* res_rem_inv_err is 64 b */
+ u64 res_mem_error;
+ /* res_mem_error is 64 b */
+ u64 res_srq_err;
+ /* res_srq_err is 64 b */
+ u64 res_cmp_err;
+ /* res_cmp_err is 64 b */
+ u64 res_invalid_dup_rkey;
+ /* res_invalid_dup_rkey is 64 b */
+ u64 res_wqe_format_err;
+ /* res_wqe_format_err is 64 b */
+ u64 res_cq_load_err;
+ /* res_cq_load_err is 64 b */
+ u64 res_srq_load_err;
+ /* res_srq_load_err is 64 b */
+ u64 res_tx_pci_err;
+ /* res_tx_pci_err is 64 b */
+ u64 res_rx_pci_err;
+ /* res_rx_pci_err is 64 b */
+};
+
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
struct bnxt_qplib_gid *gid);
@@ -147,7 +227,10 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
bool update);
int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_dev_attr *attr);
+ struct bnxt_qplib_dev_attr *attr, bool vf);
+int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx);
int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
@@ -155,7 +238,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
bool block);
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block);
+ u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size);
int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mr, int max);
@@ -164,4 +247,6 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
struct bnxt_qplib_frpl *frpl);
int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
+int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_roce_stats *stats);
#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index c3cba6063a03..2d7ea096a247 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -954,6 +954,7 @@ struct cmdq_base {
#define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL
#define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL
#define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL
u8 cmd_size;
__le16 flags;
__le16 cookie;
@@ -1383,8 +1384,20 @@ struct cmdq_register_mr {
#define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL
#define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL
#define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL
+ #define CMDQ_REGISTER_MR_LVL_LAST CMDQ_REGISTER_MR_LVL_LVL_2
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4K (0xcUL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_8K (0xdUL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_64K (0x10UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_256K (0x12UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1M (0x14UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_2M (0x15UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4M (0x16UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G (0x1eUL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_LAST \
+ CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G
+ #define CMDQ_REGISTER_MR_UNUSED1 0x80UL
u8 access;
#define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL
#define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL
@@ -1392,7 +1405,21 @@ struct cmdq_register_mr {
#define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL
#define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL
#define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL
- __le16 unused_1;
+ __le16 log2_pbl_pg_size;
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK 0x1fUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT 0
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K 0xcUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K 0xdUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K 0x10UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K 0x12UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M 0x14UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M 0x15UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M 0x16UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G 0x1eUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_LAST \
+ CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G
+ #define CMDQ_REGISTER_MR_UNUSED11_MASK 0xffe0UL
+ #define CMDQ_REGISTER_MR_UNUSED11_SFT 5
__le32 key;
__le64 pbl;
__le64 va;
@@ -1799,6 +1826,16 @@ struct cmdq_set_func_resources {
u8 resp_size;
u8 reserved8;
__le64 resp_addr;
+ __le32 number_of_qp;
+ __le32 number_of_mrw;
+ __le32 number_of_srq;
+ __le32 number_of_cq;
+ __le32 max_qp_per_vf;
+ __le32 max_mrw_per_vf;
+ __le32 max_srq_per_vf;
+ __le32 max_cq_per_vf;
+ __le32 max_gid_per_vf;
+ __le32 stat_ctx_id;
};
/* Read hardware resource context command (24 bytes) */
@@ -2013,6 +2050,20 @@ struct creq_modify_qp_resp {
__le16 reserved48[3];
};
+/* cmdq_query_roce_stats (size:128b/16B) */
+struct cmdq_query_roce_stats {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CMDQ_QUERY_ROCE_STATS_OPCODE_LAST \
+ CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
/* Query QP command response (16 bytes) */
struct creq_query_qp_resp {
u8 type;
@@ -2783,6 +2834,80 @@ struct creq_query_cc_resp_sb {
__le64 reserved64_1;
};
+/* creq_query_roce_stats_resp (size:128b/16B) */
+struct creq_query_roce_stats_resp {
+ u8 type;
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_LAST \
+ CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_ROCE_STATS_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS 0x8eUL
+ #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_LAST \
+ CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS
+ u8 reserved48[6];
+};
+
+/* creq_query_roce_stats_resp_sb (size:2624b/328B) */
+struct creq_query_roce_stats_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 rsvd;
+ __le32 num_counters;
+ __le32 rsvd1;
+ __le64 to_retransmits;
+ __le64 seq_err_naks_rcvd;
+ __le64 max_retry_exceeded;
+ __le64 rnr_naks_rcvd;
+ __le64 missing_resp;
+ __le64 unrecoverable_err;
+ __le64 bad_resp_err;
+ __le64 local_qp_op_err;
+ __le64 local_protection_err;
+ __le64 mem_mgmt_op_err;
+ __le64 remote_invalid_req_err;
+ __le64 remote_access_err;
+ __le64 remote_op_err;
+ __le64 dup_req;
+ __le64 res_exceed_max;
+ __le64 res_length_mismatch;
+ __le64 res_exceeds_wqe;
+ __le64 res_opcode_err;
+ __le64 res_rx_invalid_rkey;
+ __le64 res_rx_domain_err;
+ __le64 res_rx_no_perm;
+ __le64 res_rx_range_err;
+ __le64 res_tx_invalid_rkey;
+ __le64 res_tx_domain_err;
+ __le64 res_tx_no_perm;
+ __le64 res_tx_range_err;
+ __le64 res_irrq_oflow;
+ __le64 res_unsup_opcode;
+ __le64 res_unaligned_atomic;
+ __le64 res_rem_inv_err;
+ __le64 res_mem_error;
+ __le64 res_srq_err;
+ __le64 res_cmp_err;
+ __le64 res_invalid_dup_rkey;
+ __le64 res_wqe_format_err;
+ __le64 res_cq_load_err;
+ __le64 res_srq_load_err;
+ __le64 res_tx_pci_err;
+ __le64 res_rx_pci_err;
+};
+
/* QP error notification event (16 bytes) */
struct creq_qp_error_notification {
u8 type;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 21db3b48a617..4cf17c650c36 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -257,8 +257,8 @@ static void set_emss(struct c4iw_ep *ep, u16 opt)
if (ep->emss < 128)
ep->emss = 128;
if (ep->emss & 7)
- pr_warn("Warning: misaligned mtu idx %u mss %u emss=%u\n",
- TCPOPT_MSS_G(opt), ep->mss, ep->emss);
+ pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ TCPOPT_MSS_G(opt), ep->mss, ep->emss);
pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
ep->emss);
}
@@ -2733,9 +2733,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
if (cxgb_is_neg_adv(req->status)) {
- pr_warn("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
+ ep->hwtid, req->status, neg_adv_str(req->status));
ep->stats.abort_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
@@ -3567,8 +3566,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- pr_info("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("ignoring disconnect ep %p state %u\n",
+ ep, ep->com.state);
break;
default:
WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
@@ -4097,9 +4096,15 @@ static void process_work(struct work_struct *work)
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
opcode = rpl->ot.opcode;
- ret = work_handlers[opcode](dev, skb);
- if (!ret)
+ if (opcode >= ARRAY_SIZE(work_handlers) ||
+ !work_handlers[opcode]) {
+ pr_err("No handler for opcode 0x%x.\n", opcode);
kfree_skb(skb);
+ } else {
+ ret = work_handlers[opcode](dev, skb);
+ if (!ret)
+ kfree_skb(skb);
+ }
process_timedout_eps();
}
}
@@ -4201,8 +4206,8 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
if (cxgb_is_neg_adv(req->status)) {
- pr_warn("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
+ pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
+ ep->hwtid, req->status,
neg_adv_str(req->status));
goto out;
}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index af77d128d242..7a9d0de89d6a 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(c4iw_wr_log_size_order,
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);
-struct workqueue_struct *reg_workq;
+static struct workqueue_struct *reg_workq;
#define DB_FC_RESUME_SIZE 64
#define DB_FC_RESUME_DELAY 1
@@ -108,19 +108,19 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
(wq->rdev->wr_log_size - 1);
le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
- getnstimeofday(&le.poll_host_ts);
+ le.poll_host_time = ktime_get();
le.valid = 1;
le.cqe_sge_ts = CQE_TS(cqe);
if (SQ_TYPE(cqe)) {
le.qid = wq->sq.qid;
le.opcode = CQE_OPCODE(cqe);
- le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts;
+ le.post_host_time = wq->sq.sw_sq[wq->sq.cidx].host_time;
le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
le.wr_id = CQE_WRID_SQ_IDX(cqe);
} else {
le.qid = wq->rq.qid;
le.opcode = FW_RI_RECEIVE;
- le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts;
+ le.post_host_time = wq->rq.sw_rq[wq->rq.cidx].host_time;
le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
le.wr_id = CQE_WRID_MSN(cqe);
}
@@ -130,9 +130,9 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
static int wr_log_show(struct seq_file *seq, void *v)
{
struct c4iw_dev *dev = seq->private;
- struct timespec prev_ts = {0, 0};
+ ktime_t prev_time;
struct wr_log_entry *lep;
- int prev_ts_set = 0;
+ int prev_time_set = 0;
int idx, end;
#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
@@ -145,33 +145,29 @@ static int wr_log_show(struct seq_file *seq, void *v)
lep = &dev->rdev.wr_log[idx];
while (idx != end) {
if (lep->valid) {
- if (!prev_ts_set) {
- prev_ts_set = 1;
- prev_ts = lep->poll_host_ts;
+ if (!prev_time_set) {
+ prev_time_set = 1;
+ prev_time = lep->poll_host_time;
}
- seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode "
- "%u %s 0x%x host_wr_delta sec %lu nsec %lu "
+ seq_printf(seq, "%04u: nsec %llu qid %u opcode "
+ "%u %s 0x%x host_wr_delta nsec %llu "
"post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
"poll_sge_ts 0x%llx post_poll_delta_ns %llu "
"cqe_poll_delta_ns %llu\n",
idx,
- timespec_sub(lep->poll_host_ts,
- prev_ts).tv_sec,
- timespec_sub(lep->poll_host_ts,
- prev_ts).tv_nsec,
+ ktime_to_ns(ktime_sub(lep->poll_host_time,
+ prev_time)),
lep->qid, lep->opcode,
lep->opcode == FW_RI_RECEIVE ?
"msn" : "wrid",
lep->wr_id,
- timespec_sub(lep->poll_host_ts,
- lep->post_host_ts).tv_sec,
- timespec_sub(lep->poll_host_ts,
- lep->post_host_ts).tv_nsec,
+ ktime_to_ns(ktime_sub(lep->poll_host_time,
+ lep->post_host_time)),
lep->post_sge_ts, lep->cqe_sge_ts,
lep->poll_sge_ts,
ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
- prev_ts = lep->poll_host_ts;
+ prev_time = lep->poll_host_time;
}
idx++;
if (idx > (dev->rdev.wr_log_size - 1))
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index a252d5c40ae3..3e9d8b277ab9 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -236,7 +236,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
if (atomic_dec_and_test(&chp->refcnt))
wake_up(&chp->wait);
} else {
- pr_warn("%s unknown cqid 0x%x\n", __func__, qid);
+ pr_debug("unknown cqid 0x%x\n", qid);
spin_unlock_irqrestore(&dev->lock, flag);
}
return 0;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 65dd3726ca02..cc929002c05e 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -153,8 +153,8 @@ struct c4iw_hw_queue {
};
struct wr_log_entry {
- struct timespec post_host_ts;
- struct timespec poll_host_ts;
+ ktime_t post_host_time;
+ ktime_t poll_host_time;
u64 post_sge_ts;
u64 cqe_sge_ts;
u64 poll_sge_ts;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d5c92fc520d6..de77b6027d69 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1042,7 +1042,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (c4iw_wr_log) {
swsqe->sge_ts = cxgb4_read_sge_timestamp(
qhp->rhp->rdev.lldi.ports[0]);
- getnstimeofday(&swsqe->host_ts);
+ swsqe->host_time = ktime_get();
}
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
@@ -1117,8 +1117,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
cxgb4_read_sge_timestamp(
qhp->rhp->rdev.lldi.ports[0]);
- getnstimeofday(
- &qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_ts);
+ qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
+ ktime_get();
}
wqe->recv.opcode = FW_RI_RECV_WR;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 79e8ee12c391..8369c7c8de83 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -277,7 +277,7 @@ struct t4_swsqe {
int signaled;
u16 idx;
int flushed;
- struct timespec host_ts;
+ ktime_t host_time;
u64 sge_ts;
};
@@ -318,7 +318,7 @@ struct t4_sq {
struct t4_swrqe {
u64 wr_id;
- struct timespec host_ts;
+ ktime_t host_time;
u64 sge_ts;
};
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 4f057e8ffe50..6660f920f42e 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6518,11 +6518,12 @@ static void _dc_start(struct hfi1_devdata *dd)
if (!dd->dc_shutdown)
return;
- /*
- * Take the 8051 out of reset, wait until 8051 is ready, and set host
- * version bit.
- */
- release_and_wait_ready_8051_firmware(dd);
+ /* Take the 8051 out of reset */
+ write_csr(dd, DC_DC8051_CFG_RST, 0ull);
+ /* Wait until 8051 is ready */
+ if (wait_fm_ready(dd, TIMEOUT_8051_START))
+ dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
+ __func__);
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -8564,23 +8565,27 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
}
/*
- * If the 8051 is in reset mode (dd->dc_shutdown == 1), this function
- * will still continue executing.
- *
* Returns:
* < 0 = Linux error, not able to get access
* > 0 = 8051 command RETURN_CODE
*/
-static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
- u64 *out_data)
+static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
+ u64 *out_data)
{
u64 reg, completed;
int return_code;
unsigned long timeout;
- lockdep_assert_held(&dd->dc8051_lock);
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
+ mutex_lock(&dd->dc8051_lock);
+
+ /* We can't send any commands to the 8051 if it's in reset */
+ if (dd->dc_shutdown) {
+ return_code = -ENODEV;
+ goto fail;
+ }
+
/*
* If an 8051 host command timed out previously, then the 8051 is
* stuck.
@@ -8681,29 +8686,6 @@ static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
- return return_code;
-}
-
-/*
- * Returns:
- * < 0 = Linux error, not able to get access
- * > 0 = 8051 command RETURN_CODE
- */
-static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
- u64 *out_data)
-{
- int return_code;
-
- mutex_lock(&dd->dc8051_lock);
- /* We can't send any commands to the 8051 if it's in reset */
- if (dd->dc_shutdown) {
- return_code = -ENODEV;
- goto fail;
- }
-
- return_code = _do_8051_command(dd, type, in_data, out_data);
-
-fail:
mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8713,17 +8695,16 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
}
-static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
- u8 lane_id, u32 config_data)
+int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+ u8 lane_id, u32 config_data)
{
u64 data;
int ret;
- lockdep_assert_held(&dd->dc8051_lock);
data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
| (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
| (u64)config_data << LOAD_DATA_DATA_SHIFT;
- ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
+ ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"load 8051 config: field id %d, lane %d, err %d\n",
@@ -8732,18 +8713,6 @@ static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
return ret;
}
-int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
- u8 lane_id, u32 config_data)
-{
- int return_code;
-
- mutex_lock(&dd->dc8051_lock);
- return_code = _load_8051_config(dd, field_id, lane_id, config_data);
- mutex_unlock(&dd->dc8051_lock);
-
- return return_code;
-}
-
/*
* Read the 8051 firmware "registers". Use the RAM directly. Always
* set the result, even on error.
@@ -8859,14 +8828,13 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
u32 frame;
u32 mask;
- lockdep_assert_held(&dd->dc8051_lock);
mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
/* Clear, then set field */
frame &= ~mask;
frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
- return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
- frame);
+ return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
+ frame);
}
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
@@ -9270,6 +9238,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
+ ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to set host interface version, return 0x%x\n",
+ ret);
+ goto set_local_link_attributes_fail;
+ }
+
/*
* DC supports continuous updates.
*/
@@ -14944,9 +14920,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
num_vls > HFI1_MAX_VLS_SUPPORTED) {
- hfi1_early_err(&pdev->dev,
- "Invalid num_vls %u, using %u VLs\n",
- num_vls, HFI1_MAX_VLS_SUPPORTED);
+ dd_dev_err(dd, "Invalid num_vls %u, using %u VLs\n",
+ num_vls, HFI1_MAX_VLS_SUPPORTED);
num_vls = HFI1_MAX_VLS_SUPPORTED;
}
ppd->vls_supported = num_vls;
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 133e313feca4..21fca8ec5076 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -508,6 +508,7 @@
#define DOWN_REMOTE_REASON_SHIFT 16
#define DOWN_REMOTE_REASON_MASK 0xff
+#define HOST_INTERFACE_VERSION 1
#define HOST_INTERFACE_VERSION_SHIFT 16
#define HOST_INTERFACE_VERSION_MASK 0xff
@@ -713,7 +714,6 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch);
int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
void read_guid(struct hfi1_devdata *dd);
-int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
u8 neigh_reason, u8 rem_reason);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 4f65ac671044..067b29f35f21 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -159,22 +159,6 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
}
-const char *get_unit_name(int unit)
-{
- static char iname[16];
-
- snprintf(iname, sizeof(iname), DRIVER_NAME "_%u", unit);
- return iname;
-}
-
-const char *get_card_name(struct rvt_dev_info *rdi)
-{
- struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
- struct hfi1_devdata *dd = container_of(ibdev,
- struct hfi1_devdata, verbs_dev);
- return get_unit_name(dd->unit);
-}
-
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
{
struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 98868df78a7e..2b57ba70ddd6 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -68,7 +68,6 @@
#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
-#define HOST_INTERFACE_VERSION 1
MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC);
MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME);
@@ -976,46 +975,6 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout)
}
/*
- * Clear all reset bits, releasing the 8051.
- * Wait for firmware to be ready to accept host requests.
- * Then, set host version bit.
- *
- * This function executes even if the 8051 is in reset mode when
- * dd->dc_shutdown == 1.
- *
- * Expects dd->dc8051_lock to be held.
- */
-int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd)
-{
- int ret;
-
- lockdep_assert_held(&dd->dc8051_lock);
- /* clear all reset bits, releasing the 8051 */
- write_csr(dd, DC_DC8051_CFG_RST, 0ull);
-
- /*
- * Wait for firmware to be ready to accept host
- * requests.
- */
- ret = wait_fm_ready(dd, TIMEOUT_8051_START);
- if (ret) {
- dd_dev_err(dd, "8051 start timeout, current FW state 0x%x\n",
- get_firmware_state(dd));
- return ret;
- }
-
- ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
- if (ret != HCMD_SUCCESS) {
- dd_dev_err(dd,
- "Failed to set host interface version, return 0x%x\n",
- ret);
- return -EIO;
- }
-
- return 0;
-}
-
-/*
* Load the 8051 firmware.
*/
static int load_8051_firmware(struct hfi1_devdata *dd,
@@ -1080,22 +1039,31 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
if (ret)
return ret;
+ /* clear all reset bits, releasing the 8051 */
+ write_csr(dd, DC_DC8051_CFG_RST, 0ull);
+
/*
- * Clear all reset bits, releasing the 8051.
* DC reset step 5. Wait for firmware to be ready to accept host
* requests.
- * Then, set host version bit.
*/
- mutex_lock(&dd->dc8051_lock);
- ret = release_and_wait_ready_8051_firmware(dd);
- mutex_unlock(&dd->dc8051_lock);
- if (ret)
- return ret;
+ ret = wait_fm_ready(dd, TIMEOUT_8051_START);
+ if (ret) { /* timed out */
+ dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
+ get_firmware_state(dd));
+ return -ETIMEDOUT;
+ }
read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
(int)ver_major, (int)ver_minor, (int)ver_patch);
dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
+ ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to set host interface version, return 0x%x\n",
+ ret);
+ return -EIO;
+ }
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 8ce9118d4a7f..b42c22292597 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1623,7 +1623,7 @@ static int ingress_pkey_table_search(struct hfi1_pportdata *ppd, u16 pkey)
* the 'error info' for this failure.
*/
static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey,
- u16 slid)
+ u32 slid)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -1971,8 +1971,6 @@ int get_platform_config_field(struct hfi1_devdata *dd,
table_type, int table_index, int field_index,
u32 *data, u32 len);
-const char *get_unit_name(int unit);
-const char *get_card_name(struct rvt_dev_info *rdi);
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
/*
@@ -2122,39 +2120,42 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
#define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_err(dd, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_err_ratelimited(dd, fmt, ...) \
dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_warn(dd, fmt, ...) \
dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_warn_ratelimited(dd, fmt, ...) \
dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_info(dd, fmt, ...) \
dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_info_ratelimited(dd, fmt, ...) \
dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_dbg(dd, fmt, ...) \
dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define hfi1_dev_porterr(dd, port, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
- get_unit_name((dd)->unit), (port), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
/*
* this is used for formatting hw error messages...
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 8e3b3e7d829a..9b128268fb28 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1272,6 +1272,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
"Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
+ rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+
/*
* Initialize all locks for the device. This needs to be as early as
* possible so locks are usable.
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index cf8dba34fe30..34547a48a445 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -4348,11 +4348,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp,
*/
if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
return 0;
- /*
- * On OPA devices it is okay to lose the upper 16 bits of LID as this
- * information is obtained elsewhere. Mask off the upper 16 bits.
- */
- ingress_pkey_table_fail(ppd, pkey, ib_lid_cpu16(0xFFFF & in_wc->slid));
+ ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
return 1;
}
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4b01ccd895b4..5507910e8b8a 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -556,6 +556,8 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
struct sdma_engine *sde;
struct send_context *send_context;
struct rvt_ack_entry *e = NULL;
+ struct rvt_srq *srq = qp->ibqp.srq ?
+ ibsrq_to_rvtsrq(qp->ibqp.srq) : NULL;
sde = qp_to_sdma_engine(qp, priv->s_sc);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
@@ -563,7 +565,7 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
if (qp->s_ack_queue)
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
seq_printf(s,
- "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x\n",
+ "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n",
iter->n,
qp_idle(qp) ? "I" : "B",
qp->ibqp.qp_num,
@@ -610,7 +612,11 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
/* ack queue information */
e ? e->opcode : 0,
e ? e->psn : 0,
- e ? e->lpsn : 0);
+ e ? e->lpsn : 0,
+ qp->r_min_rnr_timer,
+ srq ? "SRQ" : "RQ",
+ srq ? srq->rq.size : qp->r_rq.size
+ );
}
void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 7eb5d50578ba..14cc212a21c7 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -841,11 +841,11 @@ static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp,
/* Convert dwords to flits */
len = (*hwords + *nwords) >> 1;
- hfi1_make_16b_hdr(hdr,
- ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr),
+ hfi1_make_16b_hdr(hdr, ppd->lid |
+ (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
+ ((1 << ppd->lmc) - 1)),
opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
- 16B),
- len, pkey, becn, 0, l4, sc5);
+ 16B), len, pkey, becn, 0, l4, sc5);
bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
bth0 |= extra_bytes << 20;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a38785e224cc..b8776a362a91 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1486,7 +1486,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ?
4096 : hfi1_max_mtu), IB_MTU_4096);
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
- mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
+ mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
/*
* sm_lid of 0xFFFF needs special handling so that it can
@@ -1844,7 +1844,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
struct hfi1_ibport *ibp = &ppd->ibport_data;
unsigned i;
int ret;
- size_t lcpysz = IB_DEVICE_NAME_MAX;
for (i = 0; i < dd->num_pports; i++)
init_ibport(ppd + i);
@@ -1872,8 +1871,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
*/
if (!ib_hfi1_sys_image_guid)
ib_hfi1_sys_image_guid = ibdev->node_guid;
- lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
- strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
@@ -1893,7 +1890,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
* Fill in rvt info object.
*/
dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
- dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index ff426a625e13..97bf2cd1cacb 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -5,7 +5,7 @@
ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
-hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \
+hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o
obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 1085cb249bc1..9ebe839d8b24 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -103,6 +103,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
context->out_param = out_param;
complete(&context->done);
}
+EXPORT_SYMBOL_GPL(hns_roce_cmd_event);
/* this should be called with "use_events" */
static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index b1c94223c28b..9549ae51a0dd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -88,6 +88,16 @@ enum {
HNS_ROCE_CMD_DESTROY_SRQC_BT0 = 0x38,
HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39,
HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a,
+
+ /* EQC commands */
+ HNS_ROCE_CMD_CREATE_AEQC = 0x80,
+ HNS_ROCE_CMD_MODIFY_AEQC = 0x81,
+ HNS_ROCE_CMD_QUERY_AEQC = 0x82,
+ HNS_ROCE_CMD_DESTROY_AEQC = 0x83,
+ HNS_ROCE_CMD_CREATE_CEQC = 0x90,
+ HNS_ROCE_CMD_MODIFY_CEQC = 0x91,
+ HNS_ROCE_CMD_QUERY_CEQC = 0x92,
+ HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
};
enum {
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 7ecb7a4147a8..dd67fafd0c40 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -376,6 +376,12 @@
#define ROCEE_RX_CMQ_TAIL_REG 0x07024
#define ROCEE_RX_CMQ_HEAD_REG 0x07028
+#define ROCEE_VF_MB_CFG0_REG 0x40
+#define ROCEE_VF_MB_STATUS_REG 0x58
+
+#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
+#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
+
#define ROCEE_VF_SMAC_CFG0_REG 0x12000
#define ROCEE_VF_SMAC_CFG1_REG 0x12004
@@ -385,4 +391,9 @@
#define ROCEE_VF_SGID_CFG3_REG 0x1000c
#define ROCEE_VF_SGID_CFG4_REG 0x10010
+#define ROCEE_VF_ABN_INT_CFG_REG 0x13000
+#define ROCEE_VF_ABN_INT_ST_REG 0x13004
+#define ROCEE_VF_ABN_INT_EN_REG 0x13008
+#define ROCEE_VF_EVENT_INT_EN_REG 0x1300c
+
#endif /* _HNS_ROCE_COMMON_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 2111b57a3489..bccc9b54c9ce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -196,15 +196,14 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
if (ret)
dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret,
hr_cq->cqn);
- if (hr_dev->eq_table.eq) {
- /* Waiting interrupt process procedure carried out */
- synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
-
- /* wait for all interrupt processed */
- if (atomic_dec_and_test(&hr_cq->refcount))
- complete(&hr_cq->free);
- wait_for_completion(&hr_cq->free);
- }
+
+ /* Waiting interrupt process procedure carried out */
+ synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
+
+ /* wait for all interrupt processed */
+ if (atomic_dec_and_test(&hr_cq->refcount))
+ complete(&hr_cq->free);
+ wait_for_completion(&hr_cq->free);
spin_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, hr_cq->cqn);
@@ -460,6 +459,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
++cq->arm_sn;
cq->comp(cq);
}
+EXPORT_SYMBOL_GPL(hns_roce_cq_completion);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
{
@@ -482,6 +482,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
}
+EXPORT_SYMBOL_GPL(hns_roce_cq_event);
int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index b154ce40cded..42c3b5a2d441 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -62,12 +62,16 @@
#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
#define HNS_ROCE_MIN_CQE_CNT 16
-#define HNS_ROCE_MAX_IRQ_NUM 34
+#define HNS_ROCE_MAX_IRQ_NUM 128
-#define HNS_ROCE_COMP_VEC_NUM 32
+#define EQ_ENABLE 1
+#define EQ_DISABLE 0
-#define HNS_ROCE_AEQE_VEC_NUM 1
-#define HNS_ROCE_AEQE_OF_VEC_NUM 1
+#define HNS_ROCE_CEQ 0
+#define HNS_ROCE_AEQ 1
+
+#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
+#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
/* 4G/4K = 1M */
#define HNS_ROCE_SL_SHIFT 28
@@ -130,6 +134,7 @@ enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12,
HNS_ROCE_EVENT_TYPE_MB = 0x13,
HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14,
+ HNS_ROCE_EVENT_TYPE_FLR = 0x15,
};
/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
@@ -173,6 +178,7 @@ enum {
enum {
HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
+ HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2)
};
enum hns_roce_mtt_type {
@@ -441,6 +447,21 @@ struct hns_roce_cmd_mailbox {
struct hns_roce_dev;
+struct hns_roce_rinl_sge {
+ void *addr;
+ u32 len;
+};
+
+struct hns_roce_rinl_wqe {
+ struct hns_roce_rinl_sge *sg_list;
+ u32 sge_cnt;
+};
+
+struct hns_roce_rinl_buf {
+ struct hns_roce_rinl_wqe *wqe_list;
+ u32 wqe_cnt;
+};
+
struct hns_roce_qp {
struct ib_qp ibqp;
struct hns_roce_buf hr_buf;
@@ -462,7 +483,9 @@ struct hns_roce_qp {
u8 resp_depth;
u8 state;
u32 access_flags;
+ u32 atomic_rd_en;
u32 pkey_index;
+ u32 qkey;
void (*event)(struct hns_roce_qp *,
enum hns_roce_event);
unsigned long qpn;
@@ -472,6 +495,8 @@ struct hns_roce_qp {
struct hns_roce_sge sge;
u32 next_sge;
+
+ struct hns_roce_rinl_buf rq_inl_buf;
};
struct hns_roce_sqp {
@@ -485,6 +510,45 @@ struct hns_roce_ib_iboe {
u8 phy_port[HNS_ROCE_MAX_PORTS];
};
+enum {
+ HNS_ROCE_EQ_STAT_INVALID = 0,
+ HNS_ROCE_EQ_STAT_VALID = 2,
+};
+
+struct hns_roce_ceqe {
+ u32 comp;
+};
+
+struct hns_roce_aeqe {
+ u32 asyn;
+ union {
+ struct {
+ u32 qp;
+ u32 rsv0;
+ u32 rsv1;
+ } qp_event;
+
+ struct {
+ u32 cq;
+ u32 rsv0;
+ u32 rsv1;
+ } cq_event;
+
+ struct {
+ u32 ceqe;
+ u32 rsv0;
+ u32 rsv1;
+ } ce_event;
+
+ struct {
+ __le64 out_param;
+ __le16 token;
+ u8 status;
+ u8 rsv0;
+ } __packed cmd;
+ } event;
+};
+
struct hns_roce_eq {
struct hns_roce_dev *hr_dev;
void __iomem *doorbell;
@@ -498,11 +562,31 @@ struct hns_roce_eq {
int log_page_size;
int cons_index;
struct hns_roce_buf_list *buf_list;
+ int over_ignore;
+ int coalesce;
+ int arm_st;
+ u64 eqe_ba;
+ int eqe_ba_pg_sz;
+ int eqe_buf_pg_sz;
+ int hop_num;
+ u64 *bt_l0; /* Base address table for L0 */
+ u64 **bt_l1; /* Base address table for L1 */
+ u64 **buf;
+ dma_addr_t l0_dma;
+ dma_addr_t *l1_dma;
+ dma_addr_t *buf_dma;
+ u32 l0_last_num; /* L0 last chunk num */
+ u32 l1_last_num; /* L1 last chunk num */
+ int eq_max_cnt;
+ int eq_period;
+ int shift;
+ dma_addr_t cur_eqe_ba;
+ dma_addr_t nxt_eqe_ba;
};
struct hns_roce_eq_table {
struct hns_roce_eq *eq;
- void __iomem **eqc_base;
+ void __iomem **eqc_base; /* only for hw v1 */
};
struct hns_roce_caps {
@@ -528,7 +612,7 @@ struct hns_roce_caps {
u32 min_wqes;
int reserved_cqs;
int num_aeq_vectors; /* 1 */
- int num_comp_vectors; /* 32 ceq */
+ int num_comp_vectors;
int num_other_vectors;
int num_mtpts;
u32 num_mtt_segs;
@@ -550,7 +634,7 @@ struct hns_roce_caps {
u32 pbl_buf_pg_sz;
u32 pbl_hop_num;
int aeqe_depth;
- int ceqe_depth[HNS_ROCE_COMP_VEC_NUM];
+ int ceqe_depth;
enum ib_mtu max_mtu;
u32 qpc_bt_num;
u32 srqc_bt_num;
@@ -574,6 +658,9 @@ struct hns_roce_caps {
u32 cqe_ba_pg_sz;
u32 cqe_buf_pg_sz;
u32 cqe_hop_num;
+ u32 eqe_ba_pg_sz;
+ u32 eqe_buf_pg_sz;
+ u32 eqe_hop_num;
u32 chunk_sz; /* chunk size in non multihop mode*/
u64 flags;
};
@@ -623,6 +710,8 @@ struct hns_roce_hw {
int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
int (*destroy_cq)(struct ib_cq *ibcq);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+ int (*init_eq)(struct hns_roce_dev *hr_dev);
+ void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
};
struct hns_roce_dev {
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c
deleted file mode 100644
index d184431e2bf5..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ /dev/null
@@ -1,759 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include "hns_roce_common.h"
-#include "hns_roce_device.h"
-#include "hns_roce_eq.h"
-
-static void eq_set_cons_index(struct hns_roce_eq *eq, int req_not)
-{
- roce_raw_write((eq->cons_index & CONS_INDEX_MASK) |
- (req_not << eq->log_entries), eq->doorbell);
- /* Memory barrier */
- mb();
-}
-
-static struct hns_roce_aeqe *get_aeqe(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE;
-
- return (struct hns_roce_aeqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_aeqe *next_aeqe_sw(struct hns_roce_eq *eq)
-{
- struct hns_roce_aeqe *aeqe = get_aeqe(eq, eq->cons_index);
-
- return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^
- !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
-}
-
-static void hns_roce_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe, int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LWQCE_QPC_ERROR:
- dev_warn(dev, "QP %d, QPC error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_MTU_ERROR:
- dev_warn(dev, "QP %d, MTU error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
- dev_warn(dev, "QP %d, WQE shift error\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SL_ERROR:
- dev_warn(dev, "QP %d, SL error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_PORT_ERROR:
- dev_warn(dev, "QP %d, port error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Access Violation Work Queue Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
- dev_warn(dev, "QP %d, R_key violation.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_LENGTH_ERROR:
- dev_warn(dev, "QP %d, length error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_VA_ERROR:
- dev_warn(dev, "QP %d, VA error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_PD_ERROR:
- dev_err(dev, "QP %d, PD error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
- dev_warn(dev, "QP %d, rw acc error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
- dev_warn(dev, "QP %d, key state error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
- dev_warn(dev, "QP %d, MR operation error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_qp_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- int phy_port;
- int qpn;
-
- qpn = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
- phy_port = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
- if (qpn <= 1)
- qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
- "QP %d, phy_port %d.\n", qpn, phy_port);
- break;
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- hns_roce_wq_catas_err_handle(hr_dev, aeqe, qpn);
- break;
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_local_wq_access_err_handle(hr_dev, aeqe, qpn);
- break;
- default:
- break;
- }
-
- hns_roce_qp_event(hr_dev, qpn, event_type);
-}
-
-static void hns_roce_cq_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u32 cqn;
-
- cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- dev_warn(dev, "CQ 0x%x access err.\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- dev_warn(dev, "CQ 0x%x overflow\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
- break;
- default:
- break;
- }
-
- hns_roce_cq_event(hr_dev, cqn, event_type);
-}
-
-static void hns_roce_db_overflow_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_DB_SUBTYPE_SDB_OVF:
- dev_warn(dev, "SDB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF:
- dev_warn(dev, "SDB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_OVF:
- dev_warn(dev, "ODB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF:
- dev_warn(dev, "ODB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- default:
- break;
- }
-}
-
-static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_aeqe *aeqe;
- int aeqes_found = 0;
- int event_type;
-
- while ((aeqe = next_aeqe_sw(eq))) {
- dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
- roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
- /* Memory barrier */
- rmb();
-
- event_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- dev_warn(dev, "PATH MIG not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_warn(dev, "COMMUNICATION established\n");
- break;
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- dev_warn(dev, "SQ DRAINED not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- dev_warn(dev, "PATH MIG failed\n");
- break;
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_qp_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
- dev_warn(dev, "SRQ not support!\n");
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- hns_roce_cq_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
- dev_warn(dev, "port change.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_MB:
- hns_roce_cmd_event(hr_dev,
- le16_to_cpu(aeqe->event.cmd.token),
- aeqe->event.cmd.status,
- le64_to_cpu(aeqe->event.cmd.out_param
- ));
- break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- hns_roce_db_overflow_handle(hr_dev, aeqe);
- break;
- case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- dev_warn(dev, "CEQ 0x%lx overflow.\n",
- roce_get_field(aeqe->event.ce_event.ceqe,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
- break;
- default:
- dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n",
- event_type, eq->eqn, eq->cons_index);
- break;
- }
-
- eq->cons_index++;
- aeqes_found = 1;
-
- if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) {
- dev_warn(dev, "cons_index overflow, set back to zero\n"
- );
- eq->cons_index = 0;
- }
- }
-
- eq_set_cons_index(eq, 0);
-
- return aeqes_found;
-}
-
-static struct hns_roce_ceqe *get_ceqe(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE;
-
- return (struct hns_roce_ceqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_ceqe *next_ceqe_sw(struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe = get_ceqe(eq, eq->cons_index);
-
- return (!!(roce_get_bit(ceqe->ceqe.comp,
- HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^
- (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
-}
-
-static int hns_roce_ceq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe;
- int ceqes_found = 0;
- u32 cqn;
-
- while ((ceqe = next_ceqe_sw(eq))) {
- /* Memory barrier */
- rmb();
- cqn = roce_get_field(ceqe->ceqe.comp,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_M,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_S);
- hns_roce_cq_completion(hr_dev, cqn);
-
- ++eq->cons_index;
- ceqes_found = 1;
-
- if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth[eq->eqn] - 1) {
- dev_warn(&eq->hr_dev->pdev->dev,
- "cons_index overflow, set back to zero\n");
- eq->cons_index = 0;
- }
- }
-
- eq_set_cons_index(eq, 0);
-
- return ceqes_found;
-}
-
-static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- struct device *dev = &eq->hr_dev->pdev->dev;
- int eqovf_found = 0;
- u32 caepaemask_val;
- u32 cealmovf_val;
- u32 caepaest_val;
- u32 aeshift_val;
- u32 ceshift_val;
- u32 cemask_val;
- int i = 0;
-
- /**
- * AEQ overflow ECC mult bit err CEQ overflow alarm
- * must clear interrupt, mask irq, clear irq, cancel mask operation
- */
- aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
-
- if (roce_get_bit(aeshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "AEQ overflow!\n");
-
- /* Set mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(caepaemask_val,
- ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
- roce_set_bit(caepaest_val,
- ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
- roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
-
- /* Clear mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(caepaemask_val,
- ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
- }
-
- /* CEQ almost overflow */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
- i * CEQ_REG_OFFSET);
-
- if (roce_get_bit(ceshift_val,
- ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
- eqovf_found++;
-
- /* Set mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- roce_set_bit(cemask_val,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- cealmovf_val = roce_read(hr_dev,
- ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET);
- roce_set_bit(cealmovf_val,
- ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
- 1);
- roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET, cealmovf_val);
-
- /* Clear mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- roce_set_bit(cemask_val,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
- }
- }
-
- /* ECC multi-bit error alarm */
- dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG));
-
- dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG));
-
- return eqovf_found;
-}
-
-static int hns_roce_eq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- int eqes_found = 0;
-
- if (likely(eq->type_flag == HNS_ROCE_CEQ))
- /* CEQ irq routine, CEQ is pulse irq, not clear */
- eqes_found = hns_roce_ceq_int(hr_dev, eq);
- else if (likely(eq->type_flag == HNS_ROCE_AEQ))
- /* AEQ irq routine, AEQ is pulse irq, not clear */
- eqes_found = hns_roce_aeq_int(hr_dev, eq);
- else
- /* AEQ queue overflow irq */
- eqes_found = hns_roce_aeq_ovf_int(hr_dev, eq);
-
- return eqes_found;
-}
-
-static irqreturn_t hns_roce_msi_x_interrupt(int irq, void *eq_ptr)
-{
- int int_work = 0;
- struct hns_roce_eq *eq = eq_ptr;
- struct hns_roce_dev *hr_dev = eq->hr_dev;
-
- int_work = hns_roce_eq_int(hr_dev, eq);
-
- return IRQ_RETVAL(int_work);
-}
-
-static void hns_roce_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
- int enable_flag)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
- u32 val;
-
- val = readl(eqc);
-
- if (enable_flag)
- roce_set_field(val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_VALID);
- else
- roce_set_field(val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
- writel(val, eqc);
-}
-
-static int hns_roce_create_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t tmp_dma_addr;
- u32 eqconsindx_val = 0;
- u32 eqcuridx_val = 0;
- u32 eqshift_val = 0;
- int num_bas = 0;
- int ret;
- int i;
-
- num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
-
- if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) {
- dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n",
- (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE,
- num_bas);
- return -EINVAL;
- }
-
- eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL);
- if (!eq->buf_list)
- return -ENOMEM;
-
- for (i = 0; i < num_bas; ++i) {
- eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE,
- &tmp_dma_addr,
- GFP_KERNEL);
- if (!eq->buf_list[i].buf) {
- ret = -ENOMEM;
- goto err_out_free_pages;
- }
-
- eq->buf_list[i].map = tmp_dma_addr;
- memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
- }
- eq->cons_index = 0;
- roce_set_field(eqshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
- roce_set_field(eqshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
- eq->log_entries);
- writel(eqshift_val, eqc);
-
- /* Configure eq extended address 12~44bit */
- writel((u32)(eq->buf_list[0].map >> 12), eqc + 4);
-
- /*
- * Configure eq extended address 45~49 bit.
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
- eq->buf_list[0].map >> 44);
- roce_set_field(eqcuridx_val,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
- writel(eqcuridx_val, eqc + 8);
-
- /* Configure eq consumer index */
- roce_set_field(eqconsindx_val,
- ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
- ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
- writel(eqconsindx_val, eqc + 0xc);
-
- return 0;
-
-err_out_free_pages:
- for (i = i - 1; i >= 0; i--)
- dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf,
- eq->buf_list[i].map);
-
- kfree(eq->buf_list);
- return ret;
-}
-
-static void hns_roce_free_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- int i = 0;
- int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
-
- if (!eq->buf_list)
- return;
-
- for (i = 0; i < npages; ++i)
- dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE,
- eq->buf_list[i].buf, eq->buf_list[i].map);
-
- kfree(eq->buf_list);
-}
-
-static void hns_roce_int_mask_en(struct hns_roce_dev *hr_dev)
-{
- int i = 0;
- u32 aemask_val;
- int masken = 0;
-
- /* AEQ INT */
- aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- masken);
- roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
-
- /* CEQ INT */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- /* IRQ mask */
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, masken);
- }
-}
-
-static void hns_roce_ce_int_default_cfg(struct hns_roce_dev *hr_dev)
-{
- /* Configure ce int interval */
- roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_INTERVAL);
-
- /* Configure ce int burst num */
- roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_BURST_NUM);
-}
-
-int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_eq *eq = NULL;
- int eq_num = 0;
- int ret = 0;
- int i = 0;
- int j = 0;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
- if (!eq_table->eq)
- return -ENOMEM;
-
- eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base),
- GFP_KERNEL);
- if (!eq_table->eqc_base) {
- ret = -ENOMEM;
- goto err_eqc_base_alloc_fail;
- }
-
- for (i = 0; i < eq_num; i++) {
- eq = &eq_table->eq[i];
- eq->hr_dev = hr_dev;
- eq->eqn = i;
- eq->irq = hr_dev->irq[i];
- eq->log_page_size = PAGE_SHIFT;
-
- if (i < hr_dev->caps.num_comp_vectors) {
- /* CEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_SHIFT_0_REG +
- HNS_ROCE_CEQC_REG_OFFSET * i;
- eq->type_flag = HNS_ROCE_CEQ;
- eq->doorbell = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_CONS_IDX_0_REG +
- HNS_ROCE_CEQC_REG_OFFSET * i;
- eq->entries = hr_dev->caps.ceqe_depth[i];
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = sizeof(struct hns_roce_ceqe);
- } else {
- /* AEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_AEQC_AEQE_SHIFT_REG;
- eq->type_flag = HNS_ROCE_AEQ;
- eq->doorbell = hr_dev->reg_base +
- ROCEE_CAEP_AEQE_CONS_IDX_REG;
- eq->entries = hr_dev->caps.aeqe_depth;
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = sizeof(struct hns_roce_aeqe);
- }
- }
-
- /* Disable irq */
- hns_roce_int_mask_en(hr_dev);
-
- /* Configure CE irq interval and burst num */
- hns_roce_ce_int_default_cfg(hr_dev);
-
- for (i = 0; i < eq_num; i++) {
- ret = hns_roce_create_eq(hr_dev, &eq_table->eq[i]);
- if (ret) {
- dev_err(dev, "eq create failed\n");
- goto err_create_eq_fail;
- }
- }
-
- for (j = 0; j < eq_num; j++) {
- ret = request_irq(eq_table->eq[j].irq, hns_roce_msi_x_interrupt,
- 0, hr_dev->irq_names[j], eq_table->eq + j);
- if (ret) {
- dev_err(dev, "request irq error!\n");
- goto err_request_irq_fail;
- }
- }
-
- for (i = 0; i < eq_num; i++)
- hns_roce_enable_eq(hr_dev, i, EQ_ENABLE);
-
- return 0;
-
-err_request_irq_fail:
- for (j = j - 1; j >= 0; j--)
- free_irq(eq_table->eq[j].irq, eq_table->eq + j);
-
-err_create_eq_fail:
- for (i = i - 1; i >= 0; i--)
- hns_roce_free_eq(hr_dev, &eq_table->eq[i]);
-
- kfree(eq_table->eqc_base);
-
-err_eqc_base_alloc_fail:
- kfree(eq_table->eq);
-
- return ret;
-}
-
-void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev)
-{
- int i;
- int eq_num;
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- for (i = 0; i < eq_num; i++) {
- /* Disable EQ */
- hns_roce_enable_eq(hr_dev, i, EQ_DISABLE);
-
- free_irq(eq_table->eq[i].irq, eq_table->eq + i);
-
- hns_roce_free_eq(hr_dev, &eq_table->eq[i]);
- }
-
- kfree(eq_table->eqc_base);
- kfree(eq_table->eq);
-}
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.h b/drivers/infiniband/hw/hns/hns_roce_eq.h
deleted file mode 100644
index c6d212d12e03..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_eq.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _HNS_ROCE_EQ_H
-#define _HNS_ROCE_EQ_H
-
-#define HNS_ROCE_CEQ 1
-#define HNS_ROCE_AEQ 2
-
-#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
-#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
-#define HNS_ROCE_CEQC_REG_OFFSET 0x18
-
-#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10
-#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10
-
-#define HNS_ROCE_INT_MASK_DISABLE 0
-#define HNS_ROCE_INT_MASK_ENABLE 1
-
-#define EQ_ENABLE 1
-#define EQ_DISABLE 0
-#define CONS_INDEX_MASK 0xffff
-
-#define CEQ_REG_OFFSET 0x18
-
-enum {
- HNS_ROCE_EQ_STAT_INVALID = 0,
- HNS_ROCE_EQ_STAT_VALID = 2,
-};
-
-struct hns_roce_aeqe {
- u32 asyn;
- union {
- struct {
- u32 qp;
- u32 rsv0;
- u32 rsv1;
- } qp_event;
-
- struct {
- u32 cq;
- u32 rsv0;
- u32 rsv1;
- } cq_event;
-
- struct {
- u32 port;
- u32 rsv0;
- u32 rsv1;
- } port_event;
-
- struct {
- u32 ceqe;
- u32 rsv0;
- u32 rsv1;
- } ce_event;
-
- struct {
- __le64 out_param;
- __le16 token;
- u8 status;
- u8 rsv0;
- } __packed cmd;
- } event;
-};
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M \
- (((1UL << 8) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M \
- (((1UL << 7) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)
-
-#define HNS_ROCE_AEQE_U32_4_OWNER_S 31
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M \
- (((1UL << 24) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S)
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M \
- (((1UL << 3) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S)
-
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M \
- (((1UL << 16) - 1) << HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)
-
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M \
- (((1UL << 5) - 1) << HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)
-
-struct hns_roce_ceqe {
- union {
- int comp;
- } ceqe;
-};
-
-#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0
-
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M \
- (((1UL << 16) - 1) << HNS_ROCE_CEQE_CEQE_COMP_CQN_S)
-
-#endif /* _HNS_ROCE_EQ_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index af27168faf0f..21ca9fa7c9d1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -33,6 +33,7 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <rdma/ib_umem.h>
@@ -774,7 +775,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
goto create_lp_qp_failed;
}
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, IB_QP_DEST_QPN,
IB_QPS_INIT, IB_QPS_RTR);
if (ret) {
dev_err(dev, "modify qp failed(%d)!\n", ret);
@@ -1492,9 +1493,9 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE;
caps->num_uars = HNS_ROCE_V1_UAR_NUM;
caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM;
- caps->num_aeq_vectors = HNS_ROCE_AEQE_VEC_NUM;
- caps->num_comp_vectors = HNS_ROCE_COMP_VEC_NUM;
- caps->num_other_vectors = HNS_ROCE_AEQE_OF_VEC_NUM;
+ caps->num_aeq_vectors = HNS_ROCE_V1_AEQE_VEC_NUM;
+ caps->num_comp_vectors = HNS_ROCE_V1_COMP_VEC_NUM;
+ caps->num_other_vectors = HNS_ROCE_V1_ABNORMAL_VEC_NUM;
caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM;
caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS;
caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM;
@@ -1529,10 +1530,8 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
caps->num_ports + 1;
}
- for (i = 0; i < caps->num_comp_vectors; i++)
- caps->ceqe_depth[i] = HNS_ROCE_V1_NUM_COMP_EQE;
-
- caps->aeqe_depth = HNS_ROCE_V1_NUM_ASYNC_EQE;
+ caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM;
+ caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM;
caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev,
ROCEE_ACK_DELAY_REG));
caps->max_mtu = IB_MTU_2048;
@@ -2312,15 +2311,16 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE:
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = le32_to_cpu(cqe->immediate_data);
+ wc->ex.imm_data =
+ cpu_to_be32(le32_to_cpu(cqe->immediate_data));
break;
case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE:
if (roce_get_bit(cqe->cqe_byte_4,
CQE_BYTE_4_IMM_INDICATOR_S)) {
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = le32_to_cpu(
- cqe->immediate_data);
+ wc->ex.imm_data = cpu_to_be32(
+ le32_to_cpu(cqe->immediate_data));
} else {
wc->opcode = IB_WC_RECV;
wc->wc_flags = 0;
@@ -3960,6 +3960,732 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
return ret;
}
+static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
+{
+ roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) |
+ (req_not << eq->log_entries), eq->doorbell);
+}
+
+static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe, int qpn)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+
+ dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
+ switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
+ HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
+ case HNS_ROCE_LWQCE_QPC_ERROR:
+ dev_warn(dev, "QP %d, QPC error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_MTU_ERROR:
+ dev_warn(dev, "QP %d, MTU error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
+ dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
+ dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
+ dev_warn(dev, "QP %d, WQE shift error\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_SL_ERROR:
+ dev_warn(dev, "QP %d, SL error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_PORT_ERROR:
+ dev_warn(dev, "QP %d, port error.\n", qpn);
+ break;
+ default:
+ break;
+ }
+}
+
+static void hns_roce_v1_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe,
+ int qpn)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+
+ dev_warn(dev, "Local Access Violation Work Queue Error.\n");
+ switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
+ HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
+ case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
+ dev_warn(dev, "QP %d, R_key violation.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_LENGTH_ERROR:
+ dev_warn(dev, "QP %d, length error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_VA_ERROR:
+ dev_warn(dev, "QP %d, VA error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_PD_ERROR:
+ dev_err(dev, "QP %d, PD error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
+ dev_warn(dev, "QP %d, rw acc error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
+ dev_warn(dev, "QP %d, key state error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
+ dev_warn(dev, "QP %d, MR operation error.\n", qpn);
+ break;
+ default:
+ break;
+ }
+}
+
+static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe,
+ int event_type)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ int phy_port;
+ int qpn;
+
+ qpn = roce_get_field(aeqe->event.qp_event.qp,
+ HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
+ HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
+ phy_port = roce_get_field(aeqe->event.qp_event.qp,
+ HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
+ HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
+ if (qpn <= 1)
+ qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
+ "QP %d, phy_port %d.\n", qpn, phy_port);
+ break;
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ hns_roce_v1_wq_catas_err_handle(hr_dev, aeqe, qpn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ hns_roce_v1_local_wq_access_err_handle(hr_dev, aeqe, qpn);
+ break;
+ default:
+ break;
+ }
+
+ hns_roce_qp_event(hr_dev, qpn, event_type);
+}
+
+static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe,
+ int event_type)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 cqn;
+
+ cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
+ HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
+ HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ dev_warn(dev, "CQ 0x%x access err.\n", cqn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
+ dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
+ break;
+ default:
+ break;
+ }
+
+ hns_roce_cq_event(hr_dev, cqn, event_type);
+}
+
+static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+
+ switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
+ HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
+ case HNS_ROCE_DB_SUBTYPE_SDB_OVF:
+ dev_warn(dev, "SDB overflow.\n");
+ break;
+ case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF:
+ dev_warn(dev, "SDB almost overflow.\n");
+ break;
+ case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP:
+ dev_warn(dev, "SDB almost empty.\n");
+ break;
+ case HNS_ROCE_DB_SUBTYPE_ODB_OVF:
+ dev_warn(dev, "ODB overflow.\n");
+ break;
+ case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF:
+ dev_warn(dev, "ODB almost overflow.\n");
+ break;
+ case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP:
+ dev_warn(dev, "SDB almost empty.\n");
+ break;
+ default:
+ break;
+ }
+}
+
+static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry)
+{
+ unsigned long off = (entry & (eq->entries - 1)) *
+ HNS_ROCE_AEQ_ENTRY_SIZE;
+
+ return (struct hns_roce_aeqe *)((u8 *)
+ (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
+ off % HNS_ROCE_BA_SIZE);
+}
+
+static struct hns_roce_aeqe *next_aeqe_sw_v1(struct hns_roce_eq *eq)
+{
+ struct hns_roce_aeqe *aeqe = get_aeqe_v1(eq, eq->cons_index);
+
+ return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^
+ !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
+}
+
+static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_aeqe *aeqe;
+ int aeqes_found = 0;
+ int event_type;
+
+ while ((aeqe = next_aeqe_sw_v1(eq))) {
+
+ /* Make sure we read the AEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
+ roce_get_field(aeqe->asyn,
+ HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
+ HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
+ event_type = roce_get_field(aeqe->asyn,
+ HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
+ HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ dev_warn(dev, "PATH MIG not supported\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ dev_warn(dev, "COMMUNICATION established\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ dev_warn(dev, "SQ DRAINED not supported\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ dev_warn(dev, "PATH MIG failed\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ hns_roce_v1_qp_err_handle(hr_dev, aeqe, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ dev_warn(dev, "SRQ not support!\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
+ hns_roce_v1_cq_err_handle(hr_dev, aeqe, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
+ dev_warn(dev, "port change.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_MB:
+ hns_roce_cmd_event(hr_dev,
+ le16_to_cpu(aeqe->event.cmd.token),
+ aeqe->event.cmd.status,
+ le64_to_cpu(aeqe->event.cmd.out_param
+ ));
+ break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ hns_roce_v1_db_overflow_handle(hr_dev, aeqe);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
+ dev_warn(dev, "CEQ 0x%lx overflow.\n",
+ roce_get_field(aeqe->event.ce_event.ceqe,
+ HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M,
+ HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
+ break;
+ default:
+ dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n",
+ event_type, eq->eqn, eq->cons_index);
+ break;
+ }
+
+ eq->cons_index++;
+ aeqes_found = 1;
+
+ if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) {
+ dev_warn(dev, "cons_index overflow, set back to 0.\n");
+ eq->cons_index = 0;
+ }
+ }
+
+ set_eq_cons_index_v1(eq, 0);
+
+ return aeqes_found;
+}
+
+static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry)
+{
+ unsigned long off = (entry & (eq->entries - 1)) *
+ HNS_ROCE_CEQ_ENTRY_SIZE;
+
+ return (struct hns_roce_ceqe *)((u8 *)
+ (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
+ off % HNS_ROCE_BA_SIZE);
+}
+
+static struct hns_roce_ceqe *next_ceqe_sw_v1(struct hns_roce_eq *eq)
+{
+ struct hns_roce_ceqe *ceqe = get_ceqe_v1(eq, eq->cons_index);
+
+ return (!!(roce_get_bit(ceqe->comp,
+ HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^
+ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
+}
+
+static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct hns_roce_ceqe *ceqe;
+ int ceqes_found = 0;
+ u32 cqn;
+
+ while ((ceqe = next_ceqe_sw_v1(eq))) {
+
+ /* Make sure we read CEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ cqn = roce_get_field(ceqe->comp,
+ HNS_ROCE_CEQE_CEQE_COMP_CQN_M,
+ HNS_ROCE_CEQE_CEQE_COMP_CQN_S);
+ hns_roce_cq_completion(hr_dev, cqn);
+
+ ++eq->cons_index;
+ ceqes_found = 1;
+
+ if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth - 1) {
+ dev_warn(&eq->hr_dev->pdev->dev,
+ "cons_index overflow, set back to 0.\n");
+ eq->cons_index = 0;
+ }
+ }
+
+ set_eq_cons_index_v1(eq, 0);
+
+ return ceqes_found;
+}
+
+static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr)
+{
+ struct hns_roce_eq *eq = eq_ptr;
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ int int_work = 0;
+
+ if (eq->type_flag == HNS_ROCE_CEQ)
+ /* CEQ irq routine, CEQ is pulse irq, not clear */
+ int_work = hns_roce_v1_ceq_int(hr_dev, eq);
+ else
+ /* AEQ irq routine, AEQ is pulse irq, not clear */
+ int_work = hns_roce_v1_aeq_int(hr_dev, eq);
+
+ return IRQ_RETVAL(int_work);
+}
+
+static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
+{
+ struct hns_roce_dev *hr_dev = dev_id;
+ struct device *dev = &hr_dev->pdev->dev;
+ int int_work = 0;
+ u32 caepaemask_val;
+ u32 cealmovf_val;
+ u32 caepaest_val;
+ u32 aeshift_val;
+ u32 ceshift_val;
+ u32 cemask_val;
+ int i;
+
+ /*
+ * Abnormal interrupt:
+ * AEQ overflow, ECC multi-bit err, CEQ overflow must clear
+ * interrupt, mask irq, clear irq, cancel mask operation
+ */
+ aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
+
+ /* AEQE overflow */
+ if (roce_get_bit(aeshift_val,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
+ dev_warn(dev, "AEQ overflow!\n");
+
+ /* Set mask */
+ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
+ roce_set_bit(caepaemask_val,
+ ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+ HNS_ROCE_INT_MASK_ENABLE);
+ roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
+
+ /* Clear int state(INT_WC : write 1 clear) */
+ caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
+ roce_set_bit(caepaest_val,
+ ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
+ roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
+
+ /* Clear mask */
+ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
+ roce_set_bit(caepaemask_val,
+ ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+ HNS_ROCE_INT_MASK_DISABLE);
+ roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
+ }
+
+ /* CEQ almost overflow */
+ for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
+ ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
+ i * CEQ_REG_OFFSET);
+
+ if (roce_get_bit(ceshift_val,
+ ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
+ dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
+ int_work++;
+
+ /* Set mask */
+ cemask_val = roce_read(hr_dev,
+ ROCEE_CAEP_CE_IRQ_MASK_0_REG +
+ i * CEQ_REG_OFFSET);
+ roce_set_bit(cemask_val,
+ ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
+ HNS_ROCE_INT_MASK_ENABLE);
+ roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
+ i * CEQ_REG_OFFSET, cemask_val);
+
+ /* Clear int state(INT_WC : write 1 clear) */
+ cealmovf_val = roce_read(hr_dev,
+ ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
+ i * CEQ_REG_OFFSET);
+ roce_set_bit(cealmovf_val,
+ ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
+ 1);
+ roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
+ i * CEQ_REG_OFFSET, cealmovf_val);
+
+ /* Clear mask */
+ cemask_val = roce_read(hr_dev,
+ ROCEE_CAEP_CE_IRQ_MASK_0_REG +
+ i * CEQ_REG_OFFSET);
+ roce_set_bit(cemask_val,
+ ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
+ HNS_ROCE_INT_MASK_DISABLE);
+ roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
+ i * CEQ_REG_OFFSET, cemask_val);
+ }
+ }
+
+ /* ECC multi-bit error alarm */
+ dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n",
+ roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG),
+ roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG),
+ roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG));
+
+ dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n",
+ roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG),
+ roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG),
+ roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG));
+
+ return IRQ_RETVAL(int_work);
+}
+
+static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev)
+{
+ u32 aemask_val;
+ int masken = 0;
+ int i;
+
+ /* AEQ INT */
+ aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
+ roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+ masken);
+ roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
+ roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
+
+ /* CEQ INT */
+ for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
+ /* IRQ mask */
+ roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
+ i * CEQ_REG_OFFSET, masken);
+ }
+}
+
+static void hns_roce_v1_free_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) +
+ HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
+ int i;
+
+ if (!eq->buf_list)
+ return;
+
+ for (i = 0; i < npages; ++i)
+ dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE,
+ eq->buf_list[i].buf, eq->buf_list[i].map);
+
+ kfree(eq->buf_list);
+}
+
+static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
+ int enable_flag)
+{
+ void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
+ u32 val;
+
+ val = readl(eqc);
+
+ if (enable_flag)
+ roce_set_field(val,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
+ HNS_ROCE_EQ_STAT_VALID);
+ else
+ roce_set_field(val,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
+ HNS_ROCE_EQ_STAT_INVALID);
+ writel(val, eqc);
+}
+
+static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
+ struct device *dev = &hr_dev->pdev->dev;
+ dma_addr_t tmp_dma_addr;
+ u32 eqconsindx_val = 0;
+ u32 eqcuridx_val = 0;
+ u32 eqshift_val = 0;
+ int num_bas;
+ int ret;
+ int i;
+
+ num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) +
+ HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
+
+ if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) {
+ dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n",
+ (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE,
+ num_bas);
+ return -EINVAL;
+ }
+
+ eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL);
+ if (!eq->buf_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_bas; ++i) {
+ eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE,
+ &tmp_dma_addr,
+ GFP_KERNEL);
+ if (!eq->buf_list[i].buf) {
+ ret = -ENOMEM;
+ goto err_out_free_pages;
+ }
+
+ eq->buf_list[i].map = tmp_dma_addr;
+ memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
+ }
+ eq->cons_index = 0;
+ roce_set_field(eqshift_val,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
+ HNS_ROCE_EQ_STAT_INVALID);
+ roce_set_field(eqshift_val,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
+ eq->log_entries);
+ writel(eqshift_val, eqc);
+
+ /* Configure eq extended address 12~44bit */
+ writel((u32)(eq->buf_list[0].map >> 12), eqc + 4);
+
+ /*
+ * Configure eq extended address 45~49 bit.
+ * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
+ * using 4K page, and shift more 32 because of
+ * caculating the high 32 bit value evaluated to hardware.
+ */
+ roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
+ ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
+ eq->buf_list[0].map >> 44);
+ roce_set_field(eqcuridx_val,
+ ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
+ ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
+ writel(eqcuridx_val, eqc + 8);
+
+ /* Configure eq consumer index */
+ roce_set_field(eqconsindx_val,
+ ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
+ ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
+ writel(eqconsindx_val, eqc + 0xc);
+
+ return 0;
+
+err_out_free_pages:
+ for (i -= 1; i >= 0; i--)
+ dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf,
+ eq->buf_list[i].map);
+
+ kfree(eq->buf_list);
+ return ret;
+}
+
+static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_eq *eq;
+ int irq_num;
+ int eq_num;
+ int ret;
+ int i, j;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+ irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+ eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
+ if (!eq_table->eq)
+ return -ENOMEM;
+
+ eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base),
+ GFP_KERNEL);
+ if (!eq_table->eqc_base) {
+ ret = -ENOMEM;
+ goto err_eqc_base_alloc_fail;
+ }
+
+ for (i = 0; i < eq_num; i++) {
+ eq = &eq_table->eq[i];
+ eq->hr_dev = hr_dev;
+ eq->eqn = i;
+ eq->irq = hr_dev->irq[i];
+ eq->log_page_size = PAGE_SHIFT;
+
+ if (i < hr_dev->caps.num_comp_vectors) {
+ /* CEQ */
+ eq_table->eqc_base[i] = hr_dev->reg_base +
+ ROCEE_CAEP_CEQC_SHIFT_0_REG +
+ CEQ_REG_OFFSET * i;
+ eq->type_flag = HNS_ROCE_CEQ;
+ eq->doorbell = hr_dev->reg_base +
+ ROCEE_CAEP_CEQC_CONS_IDX_0_REG +
+ CEQ_REG_OFFSET * i;
+ eq->entries = hr_dev->caps.ceqe_depth;
+ eq->log_entries = ilog2(eq->entries);
+ eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+ } else {
+ /* AEQ */
+ eq_table->eqc_base[i] = hr_dev->reg_base +
+ ROCEE_CAEP_AEQC_AEQE_SHIFT_REG;
+ eq->type_flag = HNS_ROCE_AEQ;
+ eq->doorbell = hr_dev->reg_base +
+ ROCEE_CAEP_AEQE_CONS_IDX_REG;
+ eq->entries = hr_dev->caps.aeqe_depth;
+ eq->log_entries = ilog2(eq->entries);
+ eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+ }
+ }
+
+ /* Disable irq */
+ hns_roce_v1_int_mask_enable(hr_dev);
+
+ /* Configure ce int interval */
+ roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG,
+ HNS_ROCE_CEQ_DEFAULT_INTERVAL);
+
+ /* Configure ce int burst num */
+ roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG,
+ HNS_ROCE_CEQ_DEFAULT_BURST_NUM);
+
+ for (i = 0; i < eq_num; i++) {
+ ret = hns_roce_v1_create_eq(hr_dev, &eq_table->eq[i]);
+ if (ret) {
+ dev_err(dev, "eq create failed\n");
+ goto err_create_eq_fail;
+ }
+ }
+
+ for (j = 0; j < irq_num; j++) {
+ if (j < eq_num)
+ ret = request_irq(hr_dev->irq[j],
+ hns_roce_v1_msix_interrupt_eq, 0,
+ hr_dev->irq_names[j],
+ &eq_table->eq[j]);
+ else
+ ret = request_irq(hr_dev->irq[j],
+ hns_roce_v1_msix_interrupt_abn, 0,
+ hr_dev->irq_names[j], hr_dev);
+
+ if (ret) {
+ dev_err(dev, "request irq error!\n");
+ goto err_request_irq_fail;
+ }
+ }
+
+ for (i = 0; i < eq_num; i++)
+ hns_roce_v1_enable_eq(hr_dev, i, EQ_ENABLE);
+
+ return 0;
+
+err_request_irq_fail:
+ for (j -= 1; j >= 0; j--)
+ free_irq(hr_dev->irq[j], &eq_table->eq[j]);
+
+err_create_eq_fail:
+ for (i -= 1; i >= 0; i--)
+ hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
+
+ kfree(eq_table->eqc_base);
+
+err_eqc_base_alloc_fail:
+ kfree(eq_table->eq);
+
+ return ret;
+}
+
+static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int irq_num;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+ irq_num = eq_num + hr_dev->caps.num_other_vectors;
+ for (i = 0; i < eq_num; i++) {
+ /* Disable EQ */
+ hns_roce_v1_enable_eq(hr_dev, i, EQ_DISABLE);
+
+ free_irq(hr_dev->irq[i], &eq_table->eq[i]);
+
+ hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
+ }
+ for (i = eq_num; i < irq_num; i++)
+ free_irq(hr_dev->irq[i], hr_dev);
+
+ kfree(eq_table->eqc_base);
+ kfree(eq_table->eq);
+}
+
static const struct hns_roce_hw hns_roce_hw_v1 = {
.reset = hns_roce_v1_reset,
.hw_profile = hns_roce_v1_profile,
@@ -3983,6 +4709,8 @@ static const struct hns_roce_hw hns_roce_hw_v1 = {
.poll_cq = hns_roce_v1_poll_cq,
.dereg_mr = hns_roce_v1_dereg_mr,
.destroy_cq = hns_roce_v1_destroy_cq,
+ .init_eq = hns_roce_v1_init_eq_table,
+ .cleanup_eq = hns_roce_v1_cleanup_eq_table,
};
static const struct of_device_id hns_roce_of_match[] = {
@@ -4060,10 +4788,6 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
/* get the mapped register base address */
res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(dev, "memory resource not found!\n");
- return -EINVAL;
- }
hr_dev->reg_base = devm_ioremap_resource(dev, res);
if (IS_ERR(hr_dev->reg_base))
return PTR_ERR(hr_dev->reg_base);
@@ -4132,14 +4856,14 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
/* read the interrupt names from the DT or ACPI */
ret = device_property_read_string_array(dev, "interrupt-names",
hr_dev->irq_names,
- HNS_ROCE_MAX_IRQ_NUM);
+ HNS_ROCE_V1_MAX_IRQ_NUM);
if (ret < 0) {
dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n");
return ret;
}
/* fetch the interrupt numbers */
- for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) {
+ for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) {
hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
if (hr_dev->irq[i] <= 0) {
dev_err(dev, "platform get of irq[=%d] failed!\n", i);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 21a07ef0afc9..b44ddd239060 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -60,8 +60,13 @@
#define HNS_ROCE_V1_GID_NUM 16
#define HNS_ROCE_V1_RESV_QP 8
-#define HNS_ROCE_V1_NUM_COMP_EQE 0x8000
-#define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400
+#define HNS_ROCE_V1_MAX_IRQ_NUM 34
+#define HNS_ROCE_V1_COMP_VEC_NUM 32
+#define HNS_ROCE_V1_AEQE_VEC_NUM 1
+#define HNS_ROCE_V1_ABNORMAL_VEC_NUM 1
+
+#define HNS_ROCE_V1_COMP_EQE_NUM 0x8000
+#define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400
#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256
#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8
@@ -159,6 +164,41 @@
#define SDB_INV_CNT_OFFSET 8
#define SDB_ST_CMP_VAL 8
+#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10
+#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10
+
+#define HNS_ROCE_INT_MASK_DISABLE 0
+#define HNS_ROCE_INT_MASK_ENABLE 1
+
+#define CEQ_REG_OFFSET 0x18
+
+#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0
+
+#define HNS_ROCE_V1_CONS_IDX_M GENMASK(15, 0)
+
+#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16
+#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M GENMASK(31, 16)
+
+#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16
+#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M GENMASK(23, 16)
+
+#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24
+#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M GENMASK(30, 24)
+
+#define HNS_ROCE_AEQE_U32_4_OWNER_S 31
+
+#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0
+#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M GENMASK(23, 0)
+
+#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
+#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M GENMASK(27, 25)
+
+#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
+#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M GENMASK(15, 0)
+
+#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
+#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0)
+
struct hns_roce_cq_context {
u32 cqc_byte_4;
u32 cq_bt_l;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 8e18445714a9..256fe110107a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -34,6 +34,7 @@
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <net/addrconf.h>
#include <rdma/ib_umem.h>
#include "hnae3.h"
@@ -51,32 +52,106 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length);
}
+static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ void *wqe, unsigned int *sge_ind,
+ struct ib_send_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ int i;
+
+ if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
+ if (rc_sq_wqe->msg_len > hr_dev->caps.max_sq_inline) {
+ *bad_wr = wr;
+ dev_err(hr_dev->dev, "inline len(1-%d)=%d, illegal",
+ rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(wqe, ((void *)wr->sg_list[i].addr),
+ wr->sg_list[i].length);
+ wqe += wr->sg_list[i].length;
+ }
+
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
+ 1);
+ } else {
+ if (wr->num_sge <= 2) {
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ }
+ }
+ } else {
+ roce_set_field(rc_sq_wqe->byte_20,
+ V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+ V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
+
+ for (i = 0; i < 2; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ }
+ }
+
+ dseg = get_send_extend_sge(qp,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
+
+ for (i = 0; i < wr->num_sge - 2; i++) {
+ if (likely(wr->sg_list[i + 2].length)) {
+ set_data_seg_v2(dseg,
+ wr->sg_list + 2 + i);
+ dseg++;
+ (*sge_ind)++;
+ }
+ }
+ }
+
+ roce_set_field(rc_sq_wqe->byte_16,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge);
+ }
+
+ return 0;
+}
+
static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
+ struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
struct hns_roce_v2_wqe_data_seg *dseg;
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db;
unsigned int sge_ind = 0;
- unsigned int wqe_sz = 0;
unsigned int owner_bit;
unsigned long flags;
unsigned int ind;
void *wqe = NULL;
+ bool loopback;
int ret = 0;
+ u8 *smac;
int nreq;
int i;
- if (unlikely(ibqp->qp_type != IB_QPT_RC)) {
+ if (unlikely(ibqp->qp_type != IB_QPT_RC &&
+ ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_UD)) {
dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
*bad_wr = NULL;
return -EOPNOTSUPP;
}
- if (unlikely(qp->state != IB_QPS_RTS && qp->state != IB_QPS_SQD)) {
+ if (unlikely(qp->state == IB_QPS_RESET || qp->state == IB_QPS_INIT ||
+ qp->state == IB_QPS_RTR)) {
dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state);
*bad_wr = wr;
return -EINVAL;
@@ -106,161 +181,255 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->wr_id;
owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1;
- rc_sq_wqe = wqe;
- memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
- for (i = 0; i < wr->num_sge; i++)
- rc_sq_wqe->msg_len += wr->sg_list[i].length;
- rc_sq_wqe->inv_key_immtdata = send_ieth(wr);
+ /* Corresponding to the QP type, wqe process separately */
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ ud_sq_wqe = wqe;
+ memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
+
+ roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
+ V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
+ roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
+ V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
+ roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
+ V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
+ roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
+ V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
+ roce_set_field(ud_sq_wqe->byte_48,
+ V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
+ V2_UD_SEND_WQE_BYTE_48_DMAC_4_S,
+ ah->av.mac[4]);
+ roce_set_field(ud_sq_wqe->byte_48,
+ V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
+ V2_UD_SEND_WQE_BYTE_48_DMAC_5_S,
+ ah->av.mac[5]);
+
+ /* MAC loopback */
+ smac = (u8 *)hr_dev->dev_addr[qp->port];
+ loopback = ether_addr_equal_unaligned(ah->av.mac,
+ smac) ? 1 : 0;
+
+ roce_set_bit(ud_sq_wqe->byte_40,
+ V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
+
+ roce_set_field(ud_sq_wqe->byte_4,
+ V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_SEND);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
- (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+ for (i = 0; i < wr->num_sge; i++)
+ ud_sq_wqe->msg_len += wr->sg_list[i].length;
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S,
- (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+ ud_sq_wqe->immtdata = send_ieth(wr);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
- (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+ /* Set sig attr */
+ roce_set_bit(ud_sq_wqe->byte_4,
+ V2_UD_SEND_WQE_BYTE_4_CQE_S,
+ (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
- owner_bit);
+ /* Set se attr */
+ roce_set_bit(ud_sq_wqe->byte_4,
+ V2_UD_SEND_WQE_BYTE_4_SE_S,
+ (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_READ);
- rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_RDMA_WRITE:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
- rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_RDMA_WRITE_WITH_IMM:
- roce_set_field(rc_sq_wqe->byte_4,
+ roce_set_bit(ud_sq_wqe->byte_4,
+ V2_UD_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
+
+ roce_set_field(ud_sq_wqe->byte_16,
+ V2_UD_SEND_WQE_BYTE_16_PD_M,
+ V2_UD_SEND_WQE_BYTE_16_PD_S,
+ to_hr_pd(ibqp->pd)->pdn);
+
+ roce_set_field(ud_sq_wqe->byte_16,
+ V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
+ wr->num_sge);
+
+ roce_set_field(ud_sq_wqe->byte_20,
+ V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
+ V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
+ sge_ind & (qp->sge.sge_cnt - 1));
+
+ roce_set_field(ud_sq_wqe->byte_24,
+ V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
+ V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
+ ud_sq_wqe->qkey =
+ cpu_to_be32(ud_wr(wr)->remote_qkey & 0x80000000) ?
+ qp->qkey : ud_wr(wr)->remote_qkey;
+ roce_set_field(ud_sq_wqe->byte_32,
+ V2_UD_SEND_WQE_BYTE_32_DQPN_M,
+ V2_UD_SEND_WQE_BYTE_32_DQPN_S,
+ ud_wr(wr)->remote_qpn);
+
+ roce_set_field(ud_sq_wqe->byte_36,
+ V2_UD_SEND_WQE_BYTE_36_VLAN_M,
+ V2_UD_SEND_WQE_BYTE_36_VLAN_S,
+ ah->av.vlan);
+ roce_set_field(ud_sq_wqe->byte_36,
+ V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
+ V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S,
+ ah->av.hop_limit);
+ roce_set_field(ud_sq_wqe->byte_36,
+ V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
+ V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
+ 0);
+ roce_set_field(ud_sq_wqe->byte_36,
+ V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
+ V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
+ 0);
+ roce_set_field(ud_sq_wqe->byte_40,
+ V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
+ V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, 0);
+ roce_set_field(ud_sq_wqe->byte_40,
+ V2_UD_SEND_WQE_BYTE_40_SL_M,
+ V2_UD_SEND_WQE_BYTE_40_SL_S,
+ ah->av.sl_tclass_flowlabel >>
+ HNS_ROCE_SL_SHIFT);
+ roce_set_field(ud_sq_wqe->byte_40,
+ V2_UD_SEND_WQE_BYTE_40_PORTN_M,
+ V2_UD_SEND_WQE_BYTE_40_PORTN_S,
+ qp->port);
+
+ roce_set_field(ud_sq_wqe->byte_48,
+ V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
+ V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
+ hns_get_gid_index(hr_dev, qp->phy_port,
+ ah->av.gid_index));
+
+ memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
+ GID_LEN_V2);
+
+ dseg = get_send_extend_sge(qp,
+ sge_ind & (qp->sge.sge_cnt - 1));
+ for (i = 0; i < wr->num_sge; i++) {
+ set_data_seg_v2(dseg + i, wr->sg_list + i);
+ sge_ind++;
+ }
+
+ ind++;
+ } else if (ibqp->qp_type == IB_QPT_RC) {
+ rc_sq_wqe = wqe;
+ memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
+ for (i = 0; i < wr->num_sge; i++)
+ rc_sq_wqe->msg_len += wr->sg_list[i].length;
+
+ rc_sq_wqe->inv_key_immtdata = send_ieth(wr);
+
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_FENCE_S,
+ (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_SE_S,
+ (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_CQE_S,
+ (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
+
+ switch (wr->opcode) {
+ case IB_WR_RDMA_READ:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_RDMA_READ);
+ rc_sq_wqe->rkey =
+ cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va =
+ cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_RDMA_WRITE:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
+ rc_sq_wqe->rkey =
+ cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va =
+ cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
- rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_SEND:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND);
- break;
- case IB_WR_SEND_WITH_INV:
- roce_set_field(rc_sq_wqe->byte_4,
+ rc_sq_wqe->rkey =
+ cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va =
+ cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_SEND:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_SEND);
+ break;
+ case IB_WR_SEND_WITH_INV:
+ roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
- break;
- case IB_WR_SEND_WITH_IMM:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
- break;
- case IB_WR_LOCAL_INV:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_LOCAL_INV);
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
- break;
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
- break;
- case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- roce_set_field(rc_sq_wqe->byte_4,
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
+ break;
+ case IB_WR_LOCAL_INV:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_LOCAL_INV);
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
+ break;
+ case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+ roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
- break;
- case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
- roce_set_field(rc_sq_wqe->byte_4,
+ break;
+ case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
+ roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
- break;
- default:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_MASK);
- break;
- }
-
- wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
- dseg = wqe;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
- if (rc_sq_wqe->msg_len >
- hr_dev->caps.max_sq_inline) {
- ret = -EINVAL;
- *bad_wr = wr;
- dev_err(dev, "inline len(1-%d)=%d, illegal",
- rc_sq_wqe->msg_len,
- hr_dev->caps.max_sq_inline);
- goto out;
+ break;
+ default:
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
+ HNS_ROCE_V2_WQE_OP_MASK);
+ break;
}
- for (i = 0; i < wr->num_sge; i++) {
- memcpy(wqe, ((void *)wr->sg_list[i].addr),
- wr->sg_list[i].length);
- wqe += wr->sg_list[i].length;
- wqe_sz += wr->sg_list[i].length;
- }
+ wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
+ dseg = wqe;
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1);
+ ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
+ &sge_ind, bad_wr);
+ if (ret)
+ goto out;
+ ind++;
} else {
- if (wr->num_sge <= 2) {
- for (i = 0; i < wr->num_sge; i++)
- set_data_seg_v2(dseg + i,
- wr->sg_list + i);
- } else {
- roce_set_field(rc_sq_wqe->byte_20,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- sge_ind & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < 2; i++)
- set_data_seg_v2(dseg + i,
- wr->sg_list + i);
-
- dseg = get_send_extend_sge(qp,
- sge_ind & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < wr->num_sge - 2; i++) {
- set_data_seg_v2(dseg + i,
- wr->sg_list + 2 + i);
- sge_ind++;
- }
- }
-
- roce_set_field(rc_sq_wqe->byte_16,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
- wr->num_sge);
- wqe_sz += wr->num_sge *
- sizeof(struct hns_roce_v2_wqe_data_seg);
+ dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+ return -EOPNOTSUPP;
}
- ind++;
}
out:
@@ -299,6 +468,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct hns_roce_v2_wqe_data_seg *dseg;
+ struct hns_roce_rinl_sge *sge_list;
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db rq_db;
unsigned long flags;
@@ -347,6 +517,14 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
dseg[i].addr = 0;
}
+ /* rq support inline data */
+ sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
+ hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
+ sge_list[i].len = wr->sg_list[i].length;
+ }
+
hr_qp->rq.wrid[ind] = wr->wr_id;
ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
@@ -908,9 +1086,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
- caps->num_aeq_vectors = 1;
- caps->num_comp_vectors = 63;
- caps->num_other_vectors = 0;
+ caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
+ caps->num_comp_vectors = HNS_ROCE_V2_COMP_VEC_NUM;
+ caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM;
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
@@ -955,12 +1133,18 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->cqe_ba_pg_sz = 0;
caps->cqe_buf_pg_sz = 0;
caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
+ caps->eqe_ba_pg_sz = 0;
+ caps->eqe_buf_pg_sz = 0;
+ caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE;
caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
- HNS_ROCE_CAP_FLAG_ROCE_V1_V2;
+ HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
+ HNS_ROCE_CAP_FLAG_RQ_INLINE;
caps->pkey_table_len[0] = 1;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
+ caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
+ caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM;
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
@@ -1382,6 +1566,8 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CQ_ST_M,
V2_CQC_BYTE_4_CQ_ST_S, V2_CQ_STATE_VALID);
+ roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M,
+ V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE);
roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M,
V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent));
roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
@@ -1422,6 +1608,15 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M,
V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3)));
+
+ roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
+ V2_CQC_BYTE_56_CQ_MAX_CNT_M,
+ V2_CQC_BYTE_56_CQ_MAX_CNT_S,
+ HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
+ roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
+ V2_CQC_BYTE_56_CQ_PERIOD_M,
+ V2_CQC_BYTE_56_CQ_PERIOD_S,
+ HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
}
static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
@@ -1457,6 +1652,40 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
return 0;
}
+static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_qp **cur_qp,
+ struct ib_wc *wc)
+{
+ struct hns_roce_rinl_sge *sge_list;
+ u32 wr_num, wr_cnt, sge_num;
+ u32 sge_cnt, data_len, size;
+ void *wqe_buf;
+
+ wr_num = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S) & 0xffff;
+ wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
+
+ sge_list = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list;
+ sge_num = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
+ wqe_buf = get_recv_wqe(*cur_qp, wr_cnt);
+ data_len = wc->byte_len;
+
+ for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
+ size = min(sge_list[sge_cnt].len, data_len);
+ memcpy((void *)sge_list[sge_cnt].addr, wqe_buf, size);
+
+ data_len -= size;
+ wqe_buf += size;
+ }
+
+ if (data_len) {
+ wc->status = IB_WC_LOC_LEN_ERR;
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
@@ -1469,6 +1698,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
u32 opcode;
u32 status;
int qpn;
+ int ret;
/* Find cqe according to consumer index */
cqe = next_cqe_sw_v2(hr_cq);
@@ -1636,7 +1866,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata);
+ wc->ex.imm_data = cqe->immtdata;
break;
case HNS_ROCE_V2_OPCODE_SEND:
wc->opcode = IB_WC_RECV;
@@ -1645,18 +1875,29 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata);
+ wc->ex.imm_data = cqe->immtdata;
break;
case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_INVALIDATE;
- wc->ex.invalidate_rkey = cqe->rkey_immtdata;
+ wc->ex.invalidate_rkey = le32_to_cpu(cqe->rkey);
break;
default:
wc->status = IB_WC_GENERAL_ERR;
break;
}
+ if ((wc->qp->qp_type == IB_QPT_RC ||
+ wc->qp->qp_type == IB_QPT_UC) &&
+ (opcode == HNS_ROCE_V2_OPCODE_SEND ||
+ opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_IMM ||
+ opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) &&
+ (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_RQ_INLINE_S))) {
+ ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc);
+ if (ret)
+ return -EAGAIN;
+ }
+
/* Update tail pointer, record wr_id */
wq = &(*cur_qp)->rq;
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
@@ -1670,6 +1911,21 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->wc_flags |= (roce_get_bit(cqe->byte_32,
V2_CQE_BYTE_32_GRH_S) ?
IB_WC_GRH : 0);
+ wc->port_num = roce_get_field(cqe->byte_32,
+ V2_CQE_BYTE_32_PORTN_M, V2_CQE_BYTE_32_PORTN_S);
+ wc->pkey_index = 0;
+ memcpy(wc->smac, cqe->smac, 4);
+ wc->smac[4] = roce_get_field(cqe->byte_28,
+ V2_CQE_BYTE_28_SMAC_4_M,
+ V2_CQE_BYTE_28_SMAC_4_S);
+ wc->smac[5] = roce_get_field(cqe->byte_28,
+ V2_CQE_BYTE_28_SMAC_5_M,
+ V2_CQE_BYTE_28_SMAC_5_S);
+ wc->vlan_id = 0xffff;
+ wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
+ wc->network_hdr_type = roce_get_field(cqe->byte_28,
+ V2_CQE_BYTE_28_PORT_TYPE_M,
+ V2_CQE_BYTE_28_PORT_TYPE_S);
}
return 0;
@@ -1859,8 +2115,39 @@ static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
return ret;
}
+static void set_access_flags(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ const struct ib_qp_attr *attr, int attr_mask)
+{
+ u8 dest_rd_atomic;
+ u32 access_flags;
+
+ dest_rd_atomic = !!(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) ?
+ attr->max_dest_rd_atomic : hr_qp->resp_depth;
+
+ access_flags = !!(attr_mask & IB_QP_ACCESS_FLAGS) ?
+ attr->qp_access_flags : hr_qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
+ !!(access_flags & IB_ACCESS_REMOTE_READ));
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 0);
+
+ roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
+ !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 0);
+
+ roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
+ !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+}
+
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
+ int attr_mask,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -1877,9 +2164,18 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
V2_QPC_BYTE_4_TST_S, 0);
- roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+ if (ibqp->qp_type == IB_QPT_GSI)
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sge.sge_cnt));
+ else
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ hr_qp->sq.max_gs > 2 ?
+ ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
@@ -1944,18 +2240,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+ if (attr_mask & IB_QP_QKEY) {
+ context->qkey_xrcd = attr->qkey;
+ qpc_mask->qkey_xrcd = 0;
+ hr_qp->qkey = attr->qkey;
+ }
+ roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
@@ -2176,9 +2467,17 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
V2_QPC_BYTE_4_TST_S, 0);
- roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+ if (ibqp->qp_type == IB_QPT_GSI)
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sge.sge_cnt));
+ else
+ roce_set_field(context->byte_4_sqpn_tst,
+ V2_QPC_BYTE_4_SGE_SHIFT_M,
+ V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ?
+ ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
+
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
@@ -2239,7 +2538,7 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_80_RX_CQN_S, 0);
roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
- V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
+ V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
V2_QPC_BYTE_252_TX_CQN_S, 0);
@@ -2255,10 +2554,10 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0);
}
- if (attr_mask & IB_QP_PKEY_INDEX)
- context->qkey_xrcd = attr->pkey_index;
- else
- context->qkey_xrcd = hr_qp->pkey_index;
+ if (attr_mask & IB_QP_QKEY) {
+ context->qkey_xrcd = attr->qkey;
+ qpc_mask->qkey_xrcd = 0;
+ }
roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
@@ -2354,7 +2653,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S,
- hr_qp->sq.max_gs > 2 ? hr_dev->caps.mtt_hop_num : 0);
+ ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
+ hr_dev->caps.mtt_hop_num : 0);
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_SGE_HOP_NUM_M,
V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0);
@@ -2463,11 +2763,14 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0);
}
- roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
- V2_QPC_BYTE_140_RR_MAX_S,
- ilog2((unsigned int)attr->max_dest_rd_atomic));
- roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
- V2_QPC_BYTE_140_RR_MAX_S, 0);
+ if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) &&
+ attr->max_dest_rd_atomic) {
+ roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
+ V2_QPC_BYTE_140_RR_MAX_S,
+ fls(attr->max_dest_rd_atomic - 1));
+ roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
+ V2_QPC_BYTE_140_RR_MAX_S, 0);
+ }
roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num);
@@ -2511,8 +2814,13 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0);
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S, attr->path_mtu);
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
+ V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
+ else
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
+ V2_QPC_BYTE_24_MTU_S, attr->path_mtu);
+
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
V2_QPC_BYTE_24_MTU_S, 0);
@@ -2557,12 +2865,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
V2_QPC_BYTE_168_LP_SGEN_INI_M,
V2_QPC_BYTE_168_LP_SGEN_INI_S, 0);
- roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M,
- V2_QPC_BYTE_208_SR_MAX_S,
- ilog2((unsigned int)attr->max_rd_atomic));
- roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M,
- V2_QPC_BYTE_208_SR_MAX_S, 0);
-
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
@@ -2625,13 +2927,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0);
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- context->sq_cur_sge_blk_addr = hr_qp->sq.max_gs > 2 ?
+ context->sq_cur_sge_blk_addr =
+ ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
((u32)(mtts[hr_qp->sge.offset / page_size]
>> PAGE_ADDR_SHIFT)) : 0;
roce_set_field(context->byte_184_irrl_idx,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
- hr_qp->sq.max_gs > 2 ?
+ ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
(mtts[hr_qp->sge.offset / page_size] >>
(32 + PAGE_ADDR_SHIFT)) : 0);
qpc_mask->sq_cur_sge_blk_addr = 0;
@@ -2766,6 +3069,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M,
V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0);
+ if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
+ roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M,
+ V2_QPC_BYTE_208_SR_MAX_S,
+ fls(attr->max_rd_atomic - 1));
+ roce_set_field(qpc_mask->byte_208_irrl,
+ V2_QPC_BYTE_208_SR_MAX_M,
+ V2_QPC_BYTE_208_SR_MAX_S, 0);
+ }
return 0;
}
@@ -2794,7 +3105,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
*/
memset(qpc_mask, 0xff, sizeof(*qpc_mask));
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- modify_qp_reset_to_init(ibqp, attr, context, qpc_mask);
+ modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
+ qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
modify_qp_init_to_init(ibqp, attr, attr_mask, context,
qpc_mask);
@@ -2829,6 +3141,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
goto out;
}
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
+ set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
+
/* Every status migrate must change state */
roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state);
@@ -2845,6 +3160,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->state = new_state;
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ hr_qp->atomic_rd_en = attr->qp_access_flags;
+
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
hr_qp->resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT) {
@@ -3098,6 +3416,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
+ kfree(hr_qp->rq_inl_buf.wqe_list);
+ }
+
return 0;
}
@@ -3162,6 +3485,1146 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
return ret;
}
+static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
+{
+ u32 doorbell[2];
+
+ doorbell[0] = 0;
+ doorbell[1] = 0;
+
+ if (eq->type_flag == HNS_ROCE_AEQ) {
+ roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
+ HNS_ROCE_V2_EQ_DB_CMD_S,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_AEQ :
+ HNS_ROCE_EQ_DB_CMD_AEQ_ARMED);
+ } else {
+ roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_TAG_M,
+ HNS_ROCE_V2_EQ_DB_TAG_S, eq->eqn);
+
+ roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
+ HNS_ROCE_V2_EQ_DB_CMD_S,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_CEQ :
+ HNS_ROCE_EQ_DB_CMD_CEQ_ARMED);
+ }
+
+ roce_set_field(doorbell[1], HNS_ROCE_V2_EQ_DB_PARA_M,
+ HNS_ROCE_V2_EQ_DB_PARA_S,
+ (eq->cons_index & HNS_ROCE_V2_CONS_IDX_M));
+
+ hns_roce_write64_k(doorbell, eq->doorbell);
+}
+
+static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe,
+ u32 qpn)
+{
+ struct device *dev = hr_dev->dev;
+ int sub_type;
+
+ dev_warn(dev, "Local work queue catastrophic error.\n");
+ sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
+ HNS_ROCE_V2_AEQE_SUB_TYPE_S);
+ switch (sub_type) {
+ case HNS_ROCE_LWQCE_QPC_ERROR:
+ dev_warn(dev, "QP %d, QPC error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_MTU_ERROR:
+ dev_warn(dev, "QP %d, MTU error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
+ dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
+ dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
+ dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
+ break;
+ default:
+ dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
+ break;
+ }
+}
+
+static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe, u32 qpn)
+{
+ struct device *dev = hr_dev->dev;
+ int sub_type;
+
+ dev_warn(dev, "Local access violation work queue error.\n");
+ sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
+ HNS_ROCE_V2_AEQE_SUB_TYPE_S);
+ switch (sub_type) {
+ case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
+ dev_warn(dev, "QP %d, R_key violation.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_LENGTH_ERROR:
+ dev_warn(dev, "QP %d, length error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_VA_ERROR:
+ dev_warn(dev, "QP %d, VA error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_PD_ERROR:
+ dev_err(dev, "QP %d, PD error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
+ dev_warn(dev, "QP %d, rw acc error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
+ dev_warn(dev, "QP %d, key state error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
+ dev_warn(dev, "QP %d, MR operation error.\n", qpn);
+ break;
+ default:
+ dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
+ break;
+ }
+}
+
+static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe,
+ int event_type)
+{
+ struct device *dev = hr_dev->dev;
+ u32 qpn;
+
+ qpn = roce_get_field(aeqe->event.qp_event.qp,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ dev_warn(dev, "Communication established.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ dev_warn(dev, "Send queue drained.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ dev_warn(dev, "Invalid request local work queue error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
+ break;
+ default:
+ break;
+ }
+
+ hns_roce_qp_event(hr_dev, qpn, event_type);
+}
+
+static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
+ struct hns_roce_aeqe *aeqe,
+ int event_type)
+{
+ struct device *dev = hr_dev->dev;
+ u32 cqn;
+
+ cqn = roce_get_field(aeqe->event.cq_event.cq,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ dev_warn(dev, "CQ 0x%x access err.\n", cqn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+ break;
+ default:
+ break;
+ }
+
+ hns_roce_cq_event(hr_dev, cqn, event_type);
+}
+
+static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
+{
+ u32 buf_chk_sz;
+ unsigned long off;
+
+ buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
+ off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
+
+ return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
+ off % buf_chk_sz);
+}
+
+static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
+{
+ u32 buf_chk_sz;
+ unsigned long off;
+
+ buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
+
+ off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
+
+ if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
+ return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
+ off % buf_chk_sz);
+ else
+ return (struct hns_roce_aeqe *)((u8 *)
+ (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
+}
+
+static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
+{
+ struct hns_roce_aeqe *aeqe;
+
+ if (!eq->hop_num)
+ aeqe = get_aeqe_v2(eq, eq->cons_index);
+ else
+ aeqe = mhop_get_aeqe(eq, eq->cons_index);
+
+ return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
+ !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
+}
+
+static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_aeqe *aeqe;
+ int aeqe_found = 0;
+ int event_type;
+
+ while ((aeqe = next_aeqe_sw_v2(eq))) {
+
+ /* Make sure we read AEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ event_type = roce_get_field(aeqe->asyn,
+ HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
+ HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ dev_warn(dev, "Path migrated succeeded.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ dev_warn(dev, "Path migration failed.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ dev_warn(dev, "SRQ not support.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ dev_warn(dev, "DB overflow.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_MB:
+ hns_roce_cmd_event(hr_dev,
+ le16_to_cpu(aeqe->event.cmd.token),
+ aeqe->event.cmd.status,
+ le64_to_cpu(aeqe->event.cmd.out_param));
+ break;
+ case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
+ dev_warn(dev, "CEQ overflow.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_FLR:
+ dev_warn(dev, "Function level reset.\n");
+ break;
+ default:
+ dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
+ event_type, eq->eqn, eq->cons_index);
+ break;
+ };
+
+ ++eq->cons_index;
+ aeqe_found = 1;
+
+ if (eq->cons_index > (2 * eq->entries - 1)) {
+ dev_warn(dev, "cons_index overflow, set back to 0.\n");
+ eq->cons_index = 0;
+ }
+ }
+
+ set_eq_cons_index_v2(eq);
+ return aeqe_found;
+}
+
+static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
+{
+ u32 buf_chk_sz;
+ unsigned long off;
+
+ buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
+ off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
+
+ return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
+ off % buf_chk_sz);
+}
+
+static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
+{
+ u32 buf_chk_sz;
+ unsigned long off;
+
+ buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
+
+ off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
+
+ if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
+ return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
+ off % buf_chk_sz);
+ else
+ return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
+ buf_chk_sz]) + off % buf_chk_sz);
+}
+
+static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
+{
+ struct hns_roce_ceqe *ceqe;
+
+ if (!eq->hop_num)
+ ceqe = get_ceqe_v2(eq, eq->cons_index);
+ else
+ ceqe = mhop_get_ceqe(eq, eq->cons_index);
+
+ return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
+ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
+}
+
+static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_ceqe *ceqe;
+ int ceqe_found = 0;
+ u32 cqn;
+
+ while ((ceqe = next_ceqe_sw_v2(eq))) {
+
+ /* Make sure we read CEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ cqn = roce_get_field(ceqe->comp,
+ HNS_ROCE_V2_CEQE_COMP_CQN_M,
+ HNS_ROCE_V2_CEQE_COMP_CQN_S);
+
+ hns_roce_cq_completion(hr_dev, cqn);
+
+ ++eq->cons_index;
+ ceqe_found = 1;
+
+ if (eq->cons_index > (2 * eq->entries - 1)) {
+ dev_warn(dev, "cons_index overflow, set back to 0.\n");
+ eq->cons_index = 0;
+ }
+ }
+
+ set_eq_cons_index_v2(eq);
+
+ return ceqe_found;
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
+{
+ struct hns_roce_eq *eq = eq_ptr;
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ int int_work = 0;
+
+ if (eq->type_flag == HNS_ROCE_CEQ)
+ /* Completion event interrupt */
+ int_work = hns_roce_v2_ceq_int(hr_dev, eq);
+ else
+ /* Asychronous event interrupt */
+ int_work = hns_roce_v2_aeq_int(hr_dev, eq);
+
+ return IRQ_RETVAL(int_work);
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+{
+ struct hns_roce_dev *hr_dev = dev_id;
+ struct device *dev = hr_dev->dev;
+ int int_work = 0;
+ u32 int_st;
+ u32 int_en;
+
+ /* Abnormal interrupt */
+ int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
+ int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
+
+ if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
+ dev_err(dev, "AEQ overflow!\n");
+
+ roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+
+ roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+
+ int_work = 1;
+ } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) {
+ dev_err(dev, "BUS ERR!\n");
+
+ roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S, 1);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+
+ roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+
+ int_work = 1;
+ } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) {
+ dev_err(dev, "OTHER ERR!\n");
+
+ roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S, 1);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+
+ roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+
+ int_work = 1;
+ } else
+ dev_err(dev, "There is no abnormal irq found!\n");
+
+ return IRQ_RETVAL(int_work);
+}
+
+static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
+ int eq_num, int enable_flag)
+{
+ int i;
+
+ if (enable_flag == EQ_ENABLE) {
+ for (i = 0; i < eq_num; i++)
+ roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
+ i * EQ_REG_OFFSET,
+ HNS_ROCE_V2_VF_EVENT_INT_EN_M);
+
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG,
+ HNS_ROCE_V2_VF_ABN_INT_EN_M);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG,
+ HNS_ROCE_V2_VF_ABN_INT_CFG_M);
+ } else {
+ for (i = 0; i < eq_num; i++)
+ roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
+ i * EQ_REG_OFFSET,
+ HNS_ROCE_V2_VF_EVENT_INT_EN_M & 0x0);
+
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG,
+ HNS_ROCE_V2_VF_ABN_INT_EN_M & 0x0);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG,
+ HNS_ROCE_V2_VF_ABN_INT_CFG_M & 0x0);
+ }
+}
+
+static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ if (eqn < hr_dev->caps.num_comp_vectors)
+ ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
+ 0, HNS_ROCE_CMD_DESTROY_CEQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ else
+ ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
+ 0, HNS_ROCE_CMD_DESTROY_AEQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ if (ret)
+ dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
+}
+
+static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = hr_dev->dev;
+ u64 idx;
+ u64 size;
+ u32 buf_chk_sz;
+ u32 bt_chk_sz;
+ u32 mhop_num;
+ int eqe_alloc;
+ int ba_num;
+ int i = 0;
+ int j = 0;
+
+ mhop_num = hr_dev->caps.eqe_hop_num;
+ buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
+ bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
+ ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) /
+ buf_chk_sz;
+
+ /* hop_num = 0 */
+ if (mhop_num == HNS_ROCE_HOP_NUM_0) {
+ dma_free_coherent(dev, (unsigned int)(eq->entries *
+ eq->eqe_size), eq->bt_l0, eq->l0_dma);
+ return;
+ }
+
+ /* hop_num = 1 or hop = 2 */
+ dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
+ if (mhop_num == 1) {
+ for (i = 0; i < eq->l0_last_num; i++) {
+ if (i == eq->l0_last_num - 1) {
+ eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
+ size = (eq->entries - eqe_alloc) * eq->eqe_size;
+ dma_free_coherent(dev, size, eq->buf[i],
+ eq->buf_dma[i]);
+ break;
+ }
+ dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
+ eq->buf_dma[i]);
+ }
+ } else if (mhop_num == 2) {
+ for (i = 0; i < eq->l0_last_num; i++) {
+ dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
+ eq->l1_dma[i]);
+
+ for (j = 0; j < bt_chk_sz / 8; j++) {
+ idx = i * (bt_chk_sz / 8) + j;
+ if ((i == eq->l0_last_num - 1)
+ && j == eq->l1_last_num - 1) {
+ eqe_alloc = (buf_chk_sz / eq->eqe_size)
+ * idx;
+ size = (eq->entries - eqe_alloc)
+ * eq->eqe_size;
+ dma_free_coherent(dev, size,
+ eq->buf[idx],
+ eq->buf_dma[idx]);
+ break;
+ }
+ dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
+ eq->buf_dma[idx]);
+ }
+ }
+ }
+ kfree(eq->buf_dma);
+ kfree(eq->buf);
+ kfree(eq->l1_dma);
+ kfree(eq->bt_l1);
+ eq->buf_dma = NULL;
+ eq->buf = NULL;
+ eq->l1_dma = NULL;
+ eq->bt_l1 = NULL;
+}
+
+static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ u32 buf_chk_sz;
+
+ buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
+
+ if (hr_dev->caps.eqe_hop_num) {
+ hns_roce_mhop_free_eq(hr_dev, eq);
+ return;
+ }
+
+ if (eq->buf_list)
+ dma_free_coherent(hr_dev->dev, buf_chk_sz,
+ eq->buf_list->buf, eq->buf_list->map);
+}
+
+static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq,
+ void *mb_buf)
+{
+ struct hns_roce_eq_context *eqc;
+
+ eqc = mb_buf;
+ memset(eqc, 0, sizeof(struct hns_roce_eq_context));
+
+ /* init eqc */
+ eq->doorbell = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG;
+ eq->hop_num = hr_dev->caps.eqe_hop_num;
+ eq->cons_index = 0;
+ eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0;
+ eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0;
+ eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED;
+ eq->eqe_ba_pg_sz = hr_dev->caps.eqe_ba_pg_sz;
+ eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
+ eq->shift = ilog2((unsigned int)eq->entries);
+
+ if (!eq->hop_num)
+ eq->eqe_ba = eq->buf_list->map;
+ else
+ eq->eqe_ba = eq->l0_dma;
+
+ /* set eqc state */
+ roce_set_field(eqc->byte_4,
+ HNS_ROCE_EQC_EQ_ST_M,
+ HNS_ROCE_EQC_EQ_ST_S,
+ HNS_ROCE_V2_EQ_STATE_VALID);
+
+ /* set eqe hop num */
+ roce_set_field(eqc->byte_4,
+ HNS_ROCE_EQC_HOP_NUM_M,
+ HNS_ROCE_EQC_HOP_NUM_S, eq->hop_num);
+
+ /* set eqc over_ignore */
+ roce_set_field(eqc->byte_4,
+ HNS_ROCE_EQC_OVER_IGNORE_M,
+ HNS_ROCE_EQC_OVER_IGNORE_S, eq->over_ignore);
+
+ /* set eqc coalesce */
+ roce_set_field(eqc->byte_4,
+ HNS_ROCE_EQC_COALESCE_M,
+ HNS_ROCE_EQC_COALESCE_S, eq->coalesce);
+
+ /* set eqc arm_state */
+ roce_set_field(eqc->byte_4,
+ HNS_ROCE_EQC_ARM_ST_M,
+ HNS_ROCE_EQC_ARM_ST_S, eq->arm_st);
+
+ /* set eqn */
+ roce_set_field(eqc->byte_4,
+ HNS_ROCE_EQC_EQN_M,
+ HNS_ROCE_EQC_EQN_S, eq->eqn);
+
+ /* set eqe_cnt */
+ roce_set_field(eqc->byte_4,
+ HNS_ROCE_EQC_EQE_CNT_M,
+ HNS_ROCE_EQC_EQE_CNT_S,
+ HNS_ROCE_EQ_INIT_EQE_CNT);
+
+ /* set eqe_ba_pg_sz */
+ roce_set_field(eqc->byte_8,
+ HNS_ROCE_EQC_BA_PG_SZ_M,
+ HNS_ROCE_EQC_BA_PG_SZ_S, eq->eqe_ba_pg_sz);
+
+ /* set eqe_buf_pg_sz */
+ roce_set_field(eqc->byte_8,
+ HNS_ROCE_EQC_BUF_PG_SZ_M,
+ HNS_ROCE_EQC_BUF_PG_SZ_S, eq->eqe_buf_pg_sz);
+
+ /* set eq_producer_idx */
+ roce_set_field(eqc->byte_8,
+ HNS_ROCE_EQC_PROD_INDX_M,
+ HNS_ROCE_EQC_PROD_INDX_S,
+ HNS_ROCE_EQ_INIT_PROD_IDX);
+
+ /* set eq_max_cnt */
+ roce_set_field(eqc->byte_12,
+ HNS_ROCE_EQC_MAX_CNT_M,
+ HNS_ROCE_EQC_MAX_CNT_S, eq->eq_max_cnt);
+
+ /* set eq_period */
+ roce_set_field(eqc->byte_12,
+ HNS_ROCE_EQC_PERIOD_M,
+ HNS_ROCE_EQC_PERIOD_S, eq->eq_period);
+
+ /* set eqe_report_timer */
+ roce_set_field(eqc->eqe_report_timer,
+ HNS_ROCE_EQC_REPORT_TIMER_M,
+ HNS_ROCE_EQC_REPORT_TIMER_S,
+ HNS_ROCE_EQ_INIT_REPORT_TIMER);
+
+ /* set eqe_ba [34:3] */
+ roce_set_field(eqc->eqe_ba0,
+ HNS_ROCE_EQC_EQE_BA_L_M,
+ HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3);
+
+ /* set eqe_ba [64:35] */
+ roce_set_field(eqc->eqe_ba1,
+ HNS_ROCE_EQC_EQE_BA_H_M,
+ HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35);
+
+ /* set eq shift */
+ roce_set_field(eqc->byte_28,
+ HNS_ROCE_EQC_SHIFT_M,
+ HNS_ROCE_EQC_SHIFT_S, eq->shift);
+
+ /* set eq MSI_IDX */
+ roce_set_field(eqc->byte_28,
+ HNS_ROCE_EQC_MSI_INDX_M,
+ HNS_ROCE_EQC_MSI_INDX_S,
+ HNS_ROCE_EQ_INIT_MSI_IDX);
+
+ /* set cur_eqe_ba [27:12] */
+ roce_set_field(eqc->byte_28,
+ HNS_ROCE_EQC_CUR_EQE_BA_L_M,
+ HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12);
+
+ /* set cur_eqe_ba [59:28] */
+ roce_set_field(eqc->byte_32,
+ HNS_ROCE_EQC_CUR_EQE_BA_M_M,
+ HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28);
+
+ /* set cur_eqe_ba [63:60] */
+ roce_set_field(eqc->byte_36,
+ HNS_ROCE_EQC_CUR_EQE_BA_H_M,
+ HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60);
+
+ /* set eq consumer idx */
+ roce_set_field(eqc->byte_36,
+ HNS_ROCE_EQC_CONS_INDX_M,
+ HNS_ROCE_EQC_CONS_INDX_S,
+ HNS_ROCE_EQ_INIT_CONS_IDX);
+
+ /* set nex_eqe_ba[43:12] */
+ roce_set_field(eqc->nxt_eqe_ba0,
+ HNS_ROCE_EQC_NXT_EQE_BA_L_M,
+ HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12);
+
+ /* set nex_eqe_ba[63:44] */
+ roce_set_field(eqc->nxt_eqe_ba1,
+ HNS_ROCE_EQC_NXT_EQE_BA_H_M,
+ HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
+}
+
+static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = hr_dev->dev;
+ int eq_alloc_done = 0;
+ int eq_buf_cnt = 0;
+ int eqe_alloc;
+ u32 buf_chk_sz;
+ u32 bt_chk_sz;
+ u32 mhop_num;
+ u64 size;
+ u64 idx;
+ int ba_num;
+ int bt_num;
+ int record_i;
+ int record_j;
+ int i = 0;
+ int j = 0;
+
+ mhop_num = hr_dev->caps.eqe_hop_num;
+ buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
+ bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
+
+ ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1)
+ / buf_chk_sz;
+ bt_num = (ba_num + bt_chk_sz / 8 - 1) / (bt_chk_sz / 8);
+
+ /* hop_num = 0 */
+ if (mhop_num == HNS_ROCE_HOP_NUM_0) {
+ if (eq->entries > buf_chk_sz / eq->eqe_size) {
+ dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
+ eq->entries);
+ return -EINVAL;
+ }
+ eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
+ &(eq->l0_dma), GFP_KERNEL);
+ if (!eq->bt_l0)
+ return -ENOMEM;
+
+ eq->cur_eqe_ba = eq->l0_dma;
+ eq->nxt_eqe_ba = 0;
+
+ memset(eq->bt_l0, 0, eq->entries * eq->eqe_size);
+
+ return 0;
+ }
+
+ eq->buf_dma = kcalloc(ba_num, sizeof(*eq->buf_dma), GFP_KERNEL);
+ if (!eq->buf_dma)
+ return -ENOMEM;
+ eq->buf = kcalloc(ba_num, sizeof(*eq->buf), GFP_KERNEL);
+ if (!eq->buf)
+ goto err_kcalloc_buf;
+
+ if (mhop_num == 2) {
+ eq->l1_dma = kcalloc(bt_num, sizeof(*eq->l1_dma), GFP_KERNEL);
+ if (!eq->l1_dma)
+ goto err_kcalloc_l1_dma;
+
+ eq->bt_l1 = kcalloc(bt_num, sizeof(*eq->bt_l1), GFP_KERNEL);
+ if (!eq->bt_l1)
+ goto err_kcalloc_bt_l1;
+ }
+
+ /* alloc L0 BT */
+ eq->bt_l0 = dma_alloc_coherent(dev, bt_chk_sz, &eq->l0_dma, GFP_KERNEL);
+ if (!eq->bt_l0)
+ goto err_dma_alloc_l0;
+
+ if (mhop_num == 1) {
+ if (ba_num > (bt_chk_sz / 8))
+ dev_err(dev, "ba_num %d is too large for 1 hop\n",
+ ba_num);
+
+ /* alloc buf */
+ for (i = 0; i < bt_chk_sz / 8; i++) {
+ if (eq_buf_cnt + 1 < ba_num) {
+ size = buf_chk_sz;
+ } else {
+ eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
+ size = (eq->entries - eqe_alloc) * eq->eqe_size;
+ }
+ eq->buf[i] = dma_alloc_coherent(dev, size,
+ &(eq->buf_dma[i]),
+ GFP_KERNEL);
+ if (!eq->buf[i])
+ goto err_dma_alloc_buf;
+
+ memset(eq->buf[i], 0, size);
+ *(eq->bt_l0 + i) = eq->buf_dma[i];
+
+ eq_buf_cnt++;
+ if (eq_buf_cnt >= ba_num)
+ break;
+ }
+ eq->cur_eqe_ba = eq->buf_dma[0];
+ eq->nxt_eqe_ba = eq->buf_dma[1];
+
+ } else if (mhop_num == 2) {
+ /* alloc L1 BT and buf */
+ for (i = 0; i < bt_chk_sz / 8; i++) {
+ eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz,
+ &(eq->l1_dma[i]),
+ GFP_KERNEL);
+ if (!eq->bt_l1[i])
+ goto err_dma_alloc_l1;
+ *(eq->bt_l0 + i) = eq->l1_dma[i];
+
+ for (j = 0; j < bt_chk_sz / 8; j++) {
+ idx = i * bt_chk_sz / 8 + j;
+ if (eq_buf_cnt + 1 < ba_num) {
+ size = buf_chk_sz;
+ } else {
+ eqe_alloc = (buf_chk_sz / eq->eqe_size)
+ * idx;
+ size = (eq->entries - eqe_alloc)
+ * eq->eqe_size;
+ }
+ eq->buf[idx] = dma_alloc_coherent(dev, size,
+ &(eq->buf_dma[idx]),
+ GFP_KERNEL);
+ if (!eq->buf[idx])
+ goto err_dma_alloc_buf;
+
+ memset(eq->buf[idx], 0, size);
+ *(eq->bt_l1[i] + j) = eq->buf_dma[idx];
+
+ eq_buf_cnt++;
+ if (eq_buf_cnt >= ba_num) {
+ eq_alloc_done = 1;
+ break;
+ }
+ }
+
+ if (eq_alloc_done)
+ break;
+ }
+ eq->cur_eqe_ba = eq->buf_dma[0];
+ eq->nxt_eqe_ba = eq->buf_dma[1];
+ }
+
+ eq->l0_last_num = i + 1;
+ if (mhop_num == 2)
+ eq->l1_last_num = j + 1;
+
+ return 0;
+
+err_dma_alloc_l1:
+ dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
+ eq->bt_l0 = NULL;
+ eq->l0_dma = 0;
+ for (i -= 1; i >= 0; i--) {
+ dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
+ eq->l1_dma[i]);
+
+ for (j = 0; j < bt_chk_sz / 8; j++) {
+ idx = i * bt_chk_sz / 8 + j;
+ dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
+ eq->buf_dma[idx]);
+ }
+ }
+ goto err_dma_alloc_l0;
+
+err_dma_alloc_buf:
+ dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
+ eq->bt_l0 = NULL;
+ eq->l0_dma = 0;
+
+ if (mhop_num == 1)
+ for (i -= i; i >= 0; i--)
+ dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
+ eq->buf_dma[i]);
+ else if (mhop_num == 2) {
+ record_i = i;
+ record_j = j;
+ for (; i >= 0; i--) {
+ dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
+ eq->l1_dma[i]);
+
+ for (j = 0; j < bt_chk_sz / 8; j++) {
+ if (i == record_i && j >= record_j)
+ break;
+
+ idx = i * bt_chk_sz / 8 + j;
+ dma_free_coherent(dev, buf_chk_sz,
+ eq->buf[idx],
+ eq->buf_dma[idx]);
+ }
+ }
+ }
+
+err_dma_alloc_l0:
+ kfree(eq->bt_l1);
+ eq->bt_l1 = NULL;
+
+err_kcalloc_bt_l1:
+ kfree(eq->l1_dma);
+ eq->l1_dma = NULL;
+
+err_kcalloc_l1_dma:
+ kfree(eq->buf);
+ eq->buf = NULL;
+
+err_kcalloc_buf:
+ kfree(eq->buf_dma);
+ eq->buf_dma = NULL;
+
+ return -ENOMEM;
+}
+
+static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq,
+ unsigned int eq_cmd)
+{
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ u32 buf_chk_sz = 0;
+ int ret;
+
+ /* Allocate mailbox memory */
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ if (!hr_dev->caps.eqe_hop_num) {
+ buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
+
+ eq->buf_list = kzalloc(sizeof(struct hns_roce_buf_list),
+ GFP_KERNEL);
+ if (!eq->buf_list) {
+ ret = -ENOMEM;
+ goto free_cmd_mbox;
+ }
+
+ eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
+ &(eq->buf_list->map),
+ GFP_KERNEL);
+ if (!eq->buf_list->buf) {
+ ret = -ENOMEM;
+ goto err_alloc_buf;
+ }
+
+ memset(eq->buf_list->buf, 0, buf_chk_sz);
+ } else {
+ ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
+ if (ret) {
+ ret = -ENOMEM;
+ goto free_cmd_mbox;
+ }
+ }
+
+ hns_roce_config_eqc(hr_dev, eq, mailbox->buf);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
+ eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ if (ret) {
+ dev_err(dev, "[mailbox cmd] creat eqc failed.\n");
+ goto err_cmd_mbox;
+ }
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+
+err_cmd_mbox:
+ if (!hr_dev->caps.eqe_hop_num)
+ dma_free_coherent(dev, buf_chk_sz, eq->buf_list->buf,
+ eq->buf_list->map);
+ else {
+ hns_roce_mhop_free_eq(hr_dev, eq);
+ goto free_cmd_mbox;
+ }
+
+err_alloc_buf:
+ kfree(eq->buf_list);
+
+free_cmd_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_eq *eq;
+ unsigned int eq_cmd;
+ int irq_num;
+ int eq_num;
+ int other_num;
+ int comp_num;
+ int aeq_num;
+ int i, j, k;
+ int ret;
+
+ other_num = hr_dev->caps.num_other_vectors;
+ comp_num = hr_dev->caps.num_comp_vectors;
+ aeq_num = hr_dev->caps.num_aeq_vectors;
+
+ eq_num = comp_num + aeq_num;
+ irq_num = eq_num + other_num;
+
+ eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
+ if (!eq_table->eq)
+ return -ENOMEM;
+
+ for (i = 0; i < irq_num; i++) {
+ hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
+ GFP_KERNEL);
+ if (!hr_dev->irq_names[i]) {
+ ret = -ENOMEM;
+ goto err_failed_kzalloc;
+ }
+ }
+
+ /* create eq */
+ for (j = 0; j < eq_num; j++) {
+ eq = &eq_table->eq[j];
+ eq->hr_dev = hr_dev;
+ eq->eqn = j;
+ if (j < comp_num) {
+ /* CEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
+ eq->type_flag = HNS_ROCE_CEQ;
+ eq->entries = hr_dev->caps.ceqe_depth;
+ eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
+ eq->irq = hr_dev->irq[j + other_num + aeq_num];
+ eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
+ } else {
+ /* AEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
+ eq->type_flag = HNS_ROCE_AEQ;
+ eq->entries = hr_dev->caps.aeqe_depth;
+ eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
+ eq->irq = hr_dev->irq[j - comp_num + other_num];
+ eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
+ }
+
+ ret = hns_roce_v2_create_eq(hr_dev, eq, eq_cmd);
+ if (ret) {
+ dev_err(dev, "eq create failed.\n");
+ goto err_create_eq_fail;
+ }
+ }
+
+ /* enable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
+
+ /* irq contains: abnormal + AEQ + CEQ*/
+ for (k = 0; k < irq_num; k++)
+ if (k < other_num)
+ snprintf((char *)hr_dev->irq_names[k],
+ HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k);
+ else if (k < (other_num + aeq_num))
+ snprintf((char *)hr_dev->irq_names[k],
+ HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
+ k - other_num);
+ else
+ snprintf((char *)hr_dev->irq_names[k],
+ HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
+ k - other_num - aeq_num);
+
+ for (k = 0; k < irq_num; k++) {
+ if (k < other_num)
+ ret = request_irq(hr_dev->irq[k],
+ hns_roce_v2_msix_interrupt_abn,
+ 0, hr_dev->irq_names[k], hr_dev);
+
+ else if (k < (other_num + comp_num))
+ ret = request_irq(eq_table->eq[k - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[k + aeq_num],
+ &eq_table->eq[k - other_num]);
+ else
+ ret = request_irq(eq_table->eq[k - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[k - comp_num],
+ &eq_table->eq[k - other_num]);
+ if (ret) {
+ dev_err(dev, "Request irq error!\n");
+ goto err_request_irq_fail;
+ }
+ }
+
+ return 0;
+
+err_request_irq_fail:
+ for (k -= 1; k >= 0; k--)
+ if (k < other_num)
+ free_irq(hr_dev->irq[k], hr_dev);
+ else
+ free_irq(eq_table->eq[k - other_num].irq,
+ &eq_table->eq[k - other_num]);
+
+err_create_eq_fail:
+ for (j -= 1; j >= 0; j--)
+ hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]);
+
+err_failed_kzalloc:
+ for (i -= 1; i >= 0; i--)
+ kfree(hr_dev->irq_names[i]);
+ kfree(eq_table->eq);
+
+ return ret;
+}
+
+static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int irq_num;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+ irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+ /* Disable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
+
+ for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
+ free_irq(hr_dev->irq[i], hr_dev);
+
+ for (i = 0; i < eq_num; i++) {
+ hns_roce_v2_destroy_eqc(hr_dev, i);
+
+ free_irq(eq_table->eq[i].irq, &eq_table->eq[i]);
+
+ hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
+ }
+
+ for (i = 0; i < irq_num; i++)
+ kfree(hr_dev->irq_names[i]);
+
+ kfree(eq_table->eq);
+}
+
static const struct hns_roce_hw hns_roce_hw_v2 = {
.cmq_init = hns_roce_v2_cmq_init,
.cmq_exit = hns_roce_v2_cmq_exit,
@@ -3183,6 +4646,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.post_recv = hns_roce_v2_post_recv,
.req_notify_cq = hns_roce_v2_req_notify_cq,
.poll_cq = hns_roce_v2_poll_cq,
+ .init_eq = hns_roce_v2_init_eq_table,
+ .cleanup_eq = hns_roce_v2_cleanup_eq_table,
};
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
@@ -3197,6 +4662,7 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
struct hnae3_handle *handle)
{
const struct pci_device_id *id;
+ int i;
id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev);
if (!id) {
@@ -3214,8 +4680,15 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
hr_dev->iboe.phy_port[0] = 0;
+ addrconf_addr_eui48((u8 *)&hr_dev->ib_dev.node_guid,
+ hr_dev->iboe.netdevs[0]->dev_addr);
+
+ for (i = 0; i < HNS_ROCE_V2_MAX_IRQ_NUM; i++)
+ hr_dev->irq[i] = pci_irq_vector(handle->pdev,
+ i + handle->rinfo.base_vector);
+
/* cmd issue mode: 0 is poll, 1 is event */
- hr_dev->cmd_mod = 0;
+ hr_dev->cmd_mod = 1;
hr_dev->loop_idc = 0;
return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 04b7a51b8efb..960df095392a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -53,6 +53,10 @@
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1
+#define HNS_ROCE_V2_MAX_IRQ_NUM 65
+#define HNS_ROCE_V2_COMP_VEC_NUM 63
+#define HNS_ROCE_V2_AEQE_VEC_NUM 1
+#define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1
#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000
#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
@@ -78,6 +82,8 @@
#define HNS_ROCE_MTT_HOP_NUM 1
#define HNS_ROCE_CQE_HOP_NUM 1
#define HNS_ROCE_PBL_HOP_NUM 2
+#define HNS_ROCE_EQE_HOP_NUM 2
+
#define HNS_ROCE_V2_GID_INDEX_NUM 256
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
@@ -105,6 +111,12 @@
(step_idx == 1 && hop_num == 1) || \
(step_idx == 2 && hop_num == 2))
+enum {
+ NO_ARMED = 0x0,
+ REG_NXT_CEQE = 0x2,
+ REG_NXT_SE_CEQE = 0x3
+};
+
#define V2_CQ_DB_REQ_NOT_SOL 0
#define V2_CQ_DB_REQ_NOT 1
@@ -229,6 +241,9 @@ struct hns_roce_v2_cq_context {
u32 cqe_report_timer;
u32 byte_64_se_cqe_idx;
};
+#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
+
#define V2_CQC_BYTE_4_CQ_ST_S 0
#define V2_CQC_BYTE_4_CQ_ST_M GENMASK(1, 0)
@@ -747,11 +762,14 @@ struct hns_roce_v2_qp_context {
struct hns_roce_v2_cqe {
u32 byte_4;
- u32 rkey_immtdata;
+ union {
+ __le32 rkey;
+ __be32 immtdata;
+ };
u32 byte_12;
u32 byte_16;
u32 byte_cnt;
- u32 smac;
+ u8 smac[4];
u32 byte_28;
u32 byte_32;
};
@@ -901,6 +919,90 @@ struct hns_roce_v2_cq_db {
#define V2_CQ_DB_PARAMETER_NOTIFY_S 24
+struct hns_roce_v2_ud_send_wqe {
+ u32 byte_4;
+ u32 msg_len;
+ u32 immtdata;
+ u32 byte_16;
+ u32 byte_20;
+ u32 byte_24;
+ u32 qkey;
+ u32 byte_32;
+ u32 byte_36;
+ u32 byte_40;
+ u32 dmac;
+ u32 byte_48;
+ u8 dgid[GID_LEN_V2];
+
+};
+#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
+#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
+
+#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
+
+#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8
+
+#define V2_UD_SEND_WQE_BYTE_4_SE_S 11
+
+#define V2_UD_SEND_WQE_BYTE_16_PD_S 0
+#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0)
+
+#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24
+#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
+
+#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
+#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+
+#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16
+#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16)
+
+#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0
+#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0)
+
+#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0
+#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0)
+
+#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16
+#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16)
+
+#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24
+#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24)
+
+#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0
+#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0)
+
+#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
+#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
+
+#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
+#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
+
+#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
+
+#define V2_UD_SEND_WQE_DMAC_0_S 0
+#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0)
+
+#define V2_UD_SEND_WQE_DMAC_1_S 8
+#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8)
+
+#define V2_UD_SEND_WQE_DMAC_2_S 16
+#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16)
+
+#define V2_UD_SEND_WQE_DMAC_3_S 24
+#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24)
+
+#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0
+#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0)
+
+#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8
+#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8)
+
+#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16
+#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16)
+
+#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24
+#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24)
+
struct hns_roce_v2_rc_send_wqe {
u32 byte_4;
u32 msg_len;
@@ -1129,9 +1231,6 @@ struct hns_roce_cmq_desc {
u32 data[6];
};
-#define ROCEE_VF_MB_CFG0_REG 0x40
-#define ROCEE_VF_MB_STATUS_REG 0x58
-
#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000
#define HNS_ROCE_HW_RUN_BIT_SHIFT 31
@@ -1174,4 +1273,178 @@ struct hns_roce_v2_priv {
struct hns_roce_v2_cmq cmq;
};
+struct hns_roce_eq_context {
+ u32 byte_4;
+ u32 byte_8;
+ u32 byte_12;
+ u32 eqe_report_timer;
+ u32 eqe_ba0;
+ u32 eqe_ba1;
+ u32 byte_28;
+ u32 byte_32;
+ u32 byte_36;
+ u32 nxt_eqe_ba0;
+ u32 nxt_eqe_ba1;
+ u32 rsv[5];
+};
+
+#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
+#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x0
+
+#define HNS_ROCE_V2_EQ_STATE_INVALID 0
+#define HNS_ROCE_V2_EQ_STATE_VALID 1
+#define HNS_ROCE_V2_EQ_STATE_OVERFLOW 2
+#define HNS_ROCE_V2_EQ_STATE_FAILURE 3
+
+#define HNS_ROCE_V2_EQ_OVER_IGNORE_0 0
+#define HNS_ROCE_V2_EQ_OVER_IGNORE_1 1
+
+#define HNS_ROCE_V2_EQ_COALESCE_0 0
+#define HNS_ROCE_V2_EQ_COALESCE_1 1
+
+#define HNS_ROCE_V2_EQ_FIRED 0
+#define HNS_ROCE_V2_EQ_ARMED 1
+#define HNS_ROCE_V2_EQ_ALWAYS_ARMED 3
+
+#define HNS_ROCE_EQ_INIT_EQE_CNT 0
+#define HNS_ROCE_EQ_INIT_PROD_IDX 0
+#define HNS_ROCE_EQ_INIT_REPORT_TIMER 0
+#define HNS_ROCE_EQ_INIT_MSI_IDX 0
+#define HNS_ROCE_EQ_INIT_CONS_IDX 0
+#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
+
+#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31
+#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31
+
+#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
+#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
+
+#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
+#define HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S 1
+#define HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S 2
+
+#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
+#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
+#define HNS_ROCE_EQ_DB_CMD_CEQ 0x2
+#define HNS_ROCE_EQ_DB_CMD_CEQ_ARMED 0x3
+
+#define EQ_ENABLE 1
+#define EQ_DISABLE 0
+
+#define EQ_REG_OFFSET 0x4
+
+#define HNS_ROCE_INT_NAME_LEN 32
+#define HNS_ROCE_V2_EQN_M GENMASK(23, 0)
+
+#define HNS_ROCE_V2_CONS_IDX_M GENMASK(23, 0)
+
+#define HNS_ROCE_V2_VF_ABN_INT_EN_S 0
+#define HNS_ROCE_V2_VF_ABN_INT_EN_M GENMASK(0, 0)
+#define HNS_ROCE_V2_VF_ABN_INT_ST_M GENMASK(2, 0)
+#define HNS_ROCE_V2_VF_ABN_INT_CFG_M GENMASK(2, 0)
+#define HNS_ROCE_V2_VF_EVENT_INT_EN_M GENMASK(0, 0)
+
+/* WORD0 */
+#define HNS_ROCE_EQC_EQ_ST_S 0
+#define HNS_ROCE_EQC_EQ_ST_M GENMASK(1, 0)
+
+#define HNS_ROCE_EQC_HOP_NUM_S 2
+#define HNS_ROCE_EQC_HOP_NUM_M GENMASK(3, 2)
+
+#define HNS_ROCE_EQC_OVER_IGNORE_S 4
+#define HNS_ROCE_EQC_OVER_IGNORE_M GENMASK(4, 4)
+
+#define HNS_ROCE_EQC_COALESCE_S 5
+#define HNS_ROCE_EQC_COALESCE_M GENMASK(5, 5)
+
+#define HNS_ROCE_EQC_ARM_ST_S 6
+#define HNS_ROCE_EQC_ARM_ST_M GENMASK(7, 6)
+
+#define HNS_ROCE_EQC_EQN_S 8
+#define HNS_ROCE_EQC_EQN_M GENMASK(15, 8)
+
+#define HNS_ROCE_EQC_EQE_CNT_S 16
+#define HNS_ROCE_EQC_EQE_CNT_M GENMASK(31, 16)
+
+/* WORD1 */
+#define HNS_ROCE_EQC_BA_PG_SZ_S 0
+#define HNS_ROCE_EQC_BA_PG_SZ_M GENMASK(3, 0)
+
+#define HNS_ROCE_EQC_BUF_PG_SZ_S 4
+#define HNS_ROCE_EQC_BUF_PG_SZ_M GENMASK(7, 4)
+
+#define HNS_ROCE_EQC_PROD_INDX_S 8
+#define HNS_ROCE_EQC_PROD_INDX_M GENMASK(31, 8)
+
+/* WORD2 */
+#define HNS_ROCE_EQC_MAX_CNT_S 0
+#define HNS_ROCE_EQC_MAX_CNT_M GENMASK(15, 0)
+
+#define HNS_ROCE_EQC_PERIOD_S 16
+#define HNS_ROCE_EQC_PERIOD_M GENMASK(31, 16)
+
+/* WORD3 */
+#define HNS_ROCE_EQC_REPORT_TIMER_S 0
+#define HNS_ROCE_EQC_REPORT_TIMER_M GENMASK(31, 0)
+
+/* WORD4 */
+#define HNS_ROCE_EQC_EQE_BA_L_S 0
+#define HNS_ROCE_EQC_EQE_BA_L_M GENMASK(31, 0)
+
+/* WORD5 */
+#define HNS_ROCE_EQC_EQE_BA_H_S 0
+#define HNS_ROCE_EQC_EQE_BA_H_M GENMASK(28, 0)
+
+/* WORD6 */
+#define HNS_ROCE_EQC_SHIFT_S 0
+#define HNS_ROCE_EQC_SHIFT_M GENMASK(7, 0)
+
+#define HNS_ROCE_EQC_MSI_INDX_S 8
+#define HNS_ROCE_EQC_MSI_INDX_M GENMASK(15, 8)
+
+#define HNS_ROCE_EQC_CUR_EQE_BA_L_S 16
+#define HNS_ROCE_EQC_CUR_EQE_BA_L_M GENMASK(31, 16)
+
+/* WORD7 */
+#define HNS_ROCE_EQC_CUR_EQE_BA_M_S 0
+#define HNS_ROCE_EQC_CUR_EQE_BA_M_M GENMASK(31, 0)
+
+/* WORD8 */
+#define HNS_ROCE_EQC_CUR_EQE_BA_H_S 0
+#define HNS_ROCE_EQC_CUR_EQE_BA_H_M GENMASK(3, 0)
+
+#define HNS_ROCE_EQC_CONS_INDX_S 8
+#define HNS_ROCE_EQC_CONS_INDX_M GENMASK(31, 8)
+
+/* WORD9 */
+#define HNS_ROCE_EQC_NXT_EQE_BA_L_S 0
+#define HNS_ROCE_EQC_NXT_EQE_BA_L_M GENMASK(31, 0)
+
+/* WORD10 */
+#define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0
+#define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0)
+
+#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
+#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
+
+#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0
+#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0)
+
+#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8
+#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8)
+
+#define HNS_ROCE_V2_EQ_DB_CMD_S 16
+#define HNS_ROCE_V2_EQ_DB_CMD_M GENMASK(17, 16)
+
+#define HNS_ROCE_V2_EQ_DB_TAG_S 0
+#define HNS_ROCE_V2_EQ_DB_TAG_M GENMASK(7, 0)
+
+#define HNS_ROCE_V2_EQ_DB_PARA_S 0
+#define HNS_ROCE_V2_EQ_DB_PARA_M GENMASK(23, 0)
+
+#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
+#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
+
#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index cf02ac2d3596..aa0c242ddc50 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -748,12 +748,10 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
goto error_failed_cmd_init;
}
- if (hr_dev->cmd_mod) {
- ret = hns_roce_init_eq_table(hr_dev);
- if (ret) {
- dev_err(dev, "eq init failed!\n");
- goto error_failed_eq_table;
- }
+ ret = hr_dev->hw->init_eq(hr_dev);
+ if (ret) {
+ dev_err(dev, "eq init failed!\n");
+ goto error_failed_eq_table;
}
if (hr_dev->cmd_mod) {
@@ -805,8 +803,7 @@ error_failed_init_hem:
hns_roce_cmd_use_polling(hr_dev);
error_failed_use_event:
- if (hr_dev->cmd_mod)
- hns_roce_cleanup_eq_table(hr_dev);
+ hr_dev->hw->cleanup_eq(hr_dev);
error_failed_eq_table:
hns_roce_cmd_cleanup(hr_dev);
@@ -837,8 +834,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
if (hr_dev->cmd_mod)
hns_roce_cmd_use_polling(hr_dev);
- if (hr_dev->cmd_mod)
- hns_roce_cleanup_eq_table(hr_dev);
+ hr_dev->hw->cleanup_eq(hr_dev);
hns_roce_cmd_cleanup(hr_dev);
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 49586ec8126a..4414cea9ef56 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -65,6 +65,7 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
}
+EXPORT_SYMBOL_GPL(hns_roce_qp_event);
static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
enum hns_roce_event type)
@@ -454,6 +455,13 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sge.sge_shift = 4;
}
+ /* ud sqwqe's sge use extend sge */
+ if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+ hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
+ hr_qp->sq.max_gs);
+ hr_qp->sge.sge_shift = 4;
+ }
+
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sq.offset = 0;
@@ -493,6 +501,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
int ret = 0;
u32 page_shift;
u32 npages;
+ int i;
mutex_init(&hr_qp->mutex);
spin_lock_init(&hr_qp->sq.lock);
@@ -500,6 +509,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp->state = IB_QPS_RESET;
+ hr_qp->ibqp.qp_type = init_attr->qp_type;
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
@@ -512,18 +523,48 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
goto err_out;
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ /* allocate recv inline buf */
+ hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
+ sizeof(struct hns_roce_rinl_wqe),
+ GFP_KERNEL);
+ if (!hr_qp->rq_inl_buf.wqe_list) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt;
+
+ /* Firstly, allocate a list of sge space buffer */
+ hr_qp->rq_inl_buf.wqe_list[0].sg_list =
+ kcalloc(hr_qp->rq_inl_buf.wqe_cnt,
+ init_attr->cap.max_recv_sge *
+ sizeof(struct hns_roce_rinl_sge),
+ GFP_KERNEL);
+ if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list) {
+ ret = -ENOMEM;
+ goto err_wqe_list;
+ }
+
+ for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++)
+ /* Secondly, reallocate the buffer */
+ hr_qp->rq_inl_buf.wqe_list[i].sg_list =
+ &hr_qp->rq_inl_buf.wqe_list[0].sg_list[i *
+ init_attr->cap.max_recv_sge];
+ }
+
if (ib_pd->uobject) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
dev_err(dev, "ib_copy_from_udata error for create qp\n");
ret = -EFAULT;
- goto err_out;
+ goto err_rq_sge_list;
}
ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
&ucmd);
if (ret) {
dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
- goto err_out;
+ goto err_rq_sge_list;
}
hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
@@ -532,7 +573,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if (IS_ERR(hr_qp->umem)) {
dev_err(dev, "ib_umem_get error for create qp\n");
ret = PTR_ERR(hr_qp->umem);
- goto err_out;
+ goto err_rq_sge_list;
}
hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
@@ -566,13 +607,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
dev_err(dev, "init_attr->create_flags error!\n");
ret = -EINVAL;
- goto err_out;
+ goto err_rq_sge_list;
}
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
dev_err(dev, "init_attr->create_flags error!\n");
ret = -EINVAL;
- goto err_out;
+ goto err_rq_sge_list;
}
/* Set SQ size */
@@ -580,7 +621,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp);
if (ret) {
dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
- goto err_out;
+ goto err_rq_sge_list;
}
/* QP doorbell register address */
@@ -596,7 +637,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
&hr_qp->hr_buf, page_shift)) {
dev_err(dev, "hns_roce_buf_alloc error!\n");
ret = -ENOMEM;
- goto err_out;
+ goto err_rq_sge_list;
}
hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
@@ -678,6 +719,14 @@ err_buf:
else
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+err_rq_sge_list:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
+ kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
+
+err_wqe_list:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
+ kfree(hr_qp->rq_inl_buf.wqe_list);
+
err_out:
return ret;
}
@@ -724,8 +773,13 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
hr_qp = &hr_sqp->hr_qp;
hr_qp->port = init_attr->port_num - 1;
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
- hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
- hr_dev->iboe.phy_port[hr_qp->port];
+
+ /* when hw version is v1, the sqpn is allocated */
+ if (hr_dev->caps.max_sq_sg <= 2)
+ hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
+ hr_dev->iboe.phy_port[hr_qp->port];
+ else
+ hr_qp->ibqp.qp_num = 1;
ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
hr_qp->ibqp.qp_num, hr_qp);
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
index f6d20ba88c03..2962979c06e9 100644
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -5,4 +5,3 @@ config INFINIBAND_I40IW
select GENERIC_ALLOCATOR
---help---
Intel(R) Ethernet X722 iWARP Driver
- INET && I40IW && INFINIBAND && I40E
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 4ae9131b6350..bcddd7061fc0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -587,5 +587,8 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
int i40iw_net_event(struct notifier_block *notifier,
unsigned long event,
void *ptr);
+int i40iw_netdevice_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr);
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 77870f9e1736..abf4cd897849 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -92,14 +92,9 @@ void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
{
u8 encoded_ird_size;
- u8 pof2_cm_ird = 1;
-
- /* round-off to next powerof2 */
- while (pof2_cm_ird < cm_ird)
- pof2_cm_ird *= 2;
/* ird_size field is encoded in qp_ctx */
- switch (pof2_cm_ird) {
+ switch (cm_ird ? roundup_pow_of_two(cm_ird) : 0) {
case I40IW_HW_IRD_SETTING_64:
encoded_ird_size = 3;
break;
@@ -125,13 +120,16 @@ static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
* @conn_ird: connection IRD
* @conn_ord: connection ORD
*/
-static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u16 conn_ird, u16 conn_ord)
+static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u32 conn_ird,
+ u32 conn_ord)
{
if (conn_ird > I40IW_MAX_IRD_SIZE)
conn_ird = I40IW_MAX_IRD_SIZE;
if (conn_ord > I40IW_MAX_ORD_SIZE)
conn_ord = I40IW_MAX_ORD_SIZE;
+ else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
+ conn_ord = 1;
cm_node->ird_size = conn_ird;
cm_node->ord_size = conn_ord;
@@ -2878,15 +2876,13 @@ static struct i40iw_cm_listener *i40iw_make_listen_node(
* i40iw_create_cm_node - make a connection node with params
* @cm_core: cm's core
* @iwdev: iwarp device structure
- * @private_data_len: len to provate data for mpa request
- * @private_data: pointer to private data for connection
+ * @conn_param: upper layer connection parameters
* @cm_info: quad info for connection
*/
static struct i40iw_cm_node *i40iw_create_cm_node(
struct i40iw_cm_core *cm_core,
struct i40iw_device *iwdev,
- u16 private_data_len,
- void *private_data,
+ struct iw_cm_conn_param *conn_param,
struct i40iw_cm_info *cm_info)
{
struct i40iw_cm_node *cm_node;
@@ -2894,6 +2890,9 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
struct i40iw_cm_node *loopback_remotenode;
struct i40iw_cm_info loopback_cm_info;
+ u16 private_data_len = conn_param->private_data_len;
+ const void *private_data = conn_param->private_data;
+
/* create a CM connection node */
cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
if (!cm_node)
@@ -2902,6 +2901,8 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
cm_node->tcp_cntxt.client = 1;
cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+ i40iw_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
+
if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
loopback_remotelistener = i40iw_find_listener(
cm_core,
@@ -2935,6 +2936,10 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
private_data_len);
loopback_remotenode->pdata.size = private_data_len;
+ if (loopback_remotenode->ord_size > cm_node->ird_size)
+ loopback_remotenode->ord_size =
+ cm_node->ird_size;
+
cm_node->state = I40IW_CM_STATE_OFFLOADED;
cm_node->tcp_cntxt.rcv_nxt =
loopback_remotenode->tcp_cntxt.loc_seq_num;
@@ -3691,7 +3696,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_node->qhash_set = false;
i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
- cm_node->accelerated = 1;
+ cm_node->accelerated = true;
status =
i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
if (status)
@@ -3815,9 +3820,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
__func__, cm_id->tos, cm_info.user_pri);
cm_id->add_ref(cm_id);
cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
- conn_param->private_data_len,
- (void *)conn_param->private_data,
- &cm_info);
+ conn_param, &cm_info);
if (IS_ERR(cm_node)) {
ret = PTR_ERR(cm_node);
@@ -3849,11 +3852,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
cm_node->apbvt_set = true;
- i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
- if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
- !cm_node->ord_size)
- cm_node->ord_size = 1;
-
iwqp->cm_node = cm_node;
cm_node->iwqp = iwqp;
iwqp->cm_id = cm_id;
@@ -4058,7 +4056,7 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
cm_node->qhash_set = false;
i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
- cm_node->accelerated = 1;
+ cm_node->accelerated = true;
status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
0);
if (status)
@@ -4242,10 +4240,16 @@ set_qhash:
}
/**
- * i40iw_cm_disconnect_all - disconnect all connected qp's
+ * i40iw_cm_teardown_connections - teardown QPs
* @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @disconnect_all: flag indicating disconnect all QPs
+ * teardown QPs where source or destination addr matches ip addr
*/
-void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
+void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
+ struct i40iw_cm_info *nfo,
+ bool disconnect_all)
{
struct i40iw_cm_core *cm_core = &iwdev->cm_core;
struct list_head *list_core_temp;
@@ -4259,8 +4263,13 @@ void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
spin_lock_irqsave(&cm_core->ht_lock, flags);
list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
cm_node = container_of(list_node, struct i40iw_cm_node, list);
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->connected_entry, &connected_list);
+ if (disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
+ !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->connected_entry, &connected_list);
+ }
}
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
@@ -4294,6 +4303,9 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
enum i40iw_quad_hash_manage_type op =
ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+ nfo.vlan_id = vlan_id;
+ nfo.ipv4 = ipv4;
+
/* Disable or enable qhash for listeners */
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
@@ -4303,8 +4315,6 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
memcpy(nfo.loc_addr, listen_node->loc_addr,
sizeof(nfo.loc_addr));
nfo.loc_port = listen_node->loc_port;
- nfo.ipv4 = listen_node->ipv4;
- nfo.vlan_id = listen_node->vlan_id;
nfo.user_pri = listen_node->user_pri;
if (!list_empty(&listen_node->child_listen_list)) {
i40iw_qhash_ctrl(iwdev,
@@ -4326,7 +4336,7 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
}
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- /* disconnect any connected qp's on ifdown */
+ /* teardown connected qp's on ifdown */
if (!ifup)
- i40iw_cm_disconnect_all(iwdev);
+ i40iw_cm_teardown_connections(iwdev, ipaddr, &nfo, false);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 0d5840d2c4fc..cf60c451e071 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -276,8 +276,6 @@ struct i40iw_cm_tcp_context {
u32 mss;
u8 snd_wscale;
u8 rcv_wscale;
-
- struct timeval sent_ts;
};
enum i40iw_cm_listener_state {
@@ -337,7 +335,7 @@ struct i40iw_cm_node {
u16 mpav2_ird_ord;
struct iw_cm_id *cm_id;
struct list_head list;
- int accelerated;
+ bool accelerated;
struct i40iw_cm_listener *listener;
int apbvt_set;
int accept_pend;
@@ -455,5 +453,7 @@ int i40iw_arp_table(struct i40iw_device *iwdev,
void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
u32 *ipaddr, bool ipv4, bool ifup);
-void i40iw_cm_disconnect_all(struct i40iw_device *iwdev);
+void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
+ struct i40iw_cm_info *nfo,
+ bool disconnect_all);
#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index da9821a10e0d..c74fd3309b93 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -1893,8 +1893,6 @@ static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
u32 count)
{
- if (count > I40IW_MAX_AEQ_ALLOCATE_COUNT)
- return I40IW_ERR_INVALID_SIZE;
if (dev->is_pf)
i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
@@ -3872,7 +3870,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
struct i40iw_virt_mem virt_mem;
u32 i, mem_size;
u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
- u32 powerof2;
u64 sd_needed;
u32 loop_count = 0;
@@ -3928,8 +3925,10 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt = I40IW_MAX_WQ_ENTRIES * qpwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt = 4 * I40IW_MAX_IRD_SIZE * qpwanted;
+ hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt =
+ roundup_pow_of_two(I40IW_MAX_WQ_ENTRIES * qpwanted);
+ hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt =
+ roundup_pow_of_two(2 * I40IW_MAX_IRD_SIZE * qpwanted);
hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
@@ -3945,16 +3944,10 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
if ((loop_count > 1000) ||
((!(loop_count % 10)) &&
(qpwanted > qpwantedoriginal * 2 / 3))) {
- if (qpwanted > FPM_MULTIPLIER) {
- qpwanted -= FPM_MULTIPLIER;
- powerof2 = 1;
- while (powerof2 < qpwanted)
- powerof2 *= 2;
- powerof2 /= 2;
- qpwanted = powerof2;
- } else {
- qpwanted /= 2;
- }
+ if (qpwanted > FPM_MULTIPLIER)
+ qpwanted = roundup_pow_of_two(qpwanted -
+ FPM_MULTIPLIER);
+ qpwanted >>= 1;
}
if (mrwanted > FPM_MULTIPLIER * 10)
mrwanted -= FPM_MULTIPLIER * 10;
@@ -3962,8 +3955,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
pblewanted -= FPM_MULTIPLIER * 1000;
} while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
- sd_needed = i40iw_est_sd(dev, hmc_info);
-
i40iw_debug(dev, I40IW_DEBUG_HMC,
"loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
loop_count, sd_needed,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 029083cb81d5..4b65e4140bd7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -97,6 +97,7 @@
#define RDMA_OPCODE_MASK 0x0f
#define RDMA_READ_REQ_OPCODE 1
#define Q2_BAD_FRAME_OFFSET 72
+#define Q2_FPSN_OFFSET 64
#define CQE_MAJOR_DRV 0x8000
#define I40IW_TERM_SENT 0x01
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index e96bdafbcbb3..61540e14e4b9 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -385,6 +385,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
}
break;
+ case I40IW_AE_LLP_DOUBT_REACHABILITY:
+ break;
case I40IW_AE_PRIV_OPERATION_DENIED:
case I40IW_AE_STAG_ZERO_INVALID:
case I40IW_AE_IB_RREQ_AND_Q1_FULL:
@@ -403,7 +405,6 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
case I40IW_AE_LLP_SYN_RECEIVED:
case I40IW_AE_LLP_TOO_MANY_RETRIES:
- case I40IW_AE_LLP_DOUBT_REACHABILITY:
case I40IW_AE_LCE_QP_CATASTROPHIC:
case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
case I40IW_AE_LCE_CQ_CATASTROPHIC:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index e824296713e2..b08862978de8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -99,6 +99,10 @@ static struct notifier_block i40iw_net_notifier = {
.notifier_call = i40iw_net_event
};
+static struct notifier_block i40iw_netdevice_notifier = {
+ .notifier_call = i40iw_netdevice_event
+};
+
/**
* i40iw_find_i40e_handler - find a handler given a client info
* @ldev: pointer to a client info
@@ -483,6 +487,7 @@ static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
info.rsrc_type = iw_hmc_obj_types[i];
info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+ info.add_sd_cnt = 0;
status = i40iw_create_hmc_obj_type(dev, &info);
if (status) {
i40iw_pr_err("create obj type %d status = %d\n",
@@ -607,7 +612,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
/* init the waitq of the cqp_requests and add them to the list */
- for (i = 0; i < I40IW_CQP_SW_SQSIZE_2048; i++) {
+ for (i = 0; i < sqsize; i++) {
init_waitqueue_head(&cqp->cqp_requests[i].waitq);
list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
}
@@ -1285,7 +1290,7 @@ static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
__LINE__, statuscpu2);
if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
break; /* SUCCESS */
- mdelay(1000);
+ msleep(1000);
retrycount++;
} while (retrycount < 14);
i40iw_wr32(hw, 0xb4040, 0x4C104C5);
@@ -1393,6 +1398,7 @@ static void i40iw_register_notifiers(void)
register_inetaddr_notifier(&i40iw_inetaddr_notifier);
register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
register_netevent_notifier(&i40iw_net_notifier);
+ register_netdevice_notifier(&i40iw_netdevice_notifier);
}
/**
@@ -1404,6 +1410,7 @@ static void i40iw_unregister_notifiers(void)
unregister_netevent_notifier(&i40iw_net_notifier);
unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+ unregister_netdevice_notifier(&i40iw_netdevice_notifier);
}
/**
@@ -1793,7 +1800,7 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool
if (reset)
iwdev->reset = true;
- i40iw_cm_disconnect_all(iwdev);
+ i40iw_cm_teardown_connections(iwdev, NULL, NULL, true);
destroy_workqueue(iwdev->virtchnl_wq);
i40iw_deinit_device(iwdev);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 796a815b53fd..4c21197830b3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -48,7 +48,6 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid);
static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
*rsrc, bool initial);
-static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
/**
* i40iw_puda_get_listbuf - get buffer from puda list
* @list: list to use for buffers (ILQ or IEQ)
@@ -1378,7 +1377,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
u32 rcv_wnd = hw_host_ctx[23];
/* first partial seq # in q2 */
- u32 fps = qp->q2_buf[16];
+ u32 fps = *(u32 *)(qp->q2_buf + Q2_FPSN_OFFSET);
struct list_head *rxlist = &pfpdu->rxlist;
struct list_head *plist;
@@ -1483,7 +1482,7 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
* @ieq: ieq resource
* @qp: all pending fpdu buffers
*/
-static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
+void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
{
struct i40iw_puda_buf *buf;
struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
index 660aa3edae56..53a7d58c84b5 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -184,4 +184,5 @@ enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct
enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
+void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 3ec5389a81a1..8afa5a67a86b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -894,20 +894,6 @@ exit:
}
/**
- * i40iw_qp_roundup - return round up QP WQ depth
- * @wqdepth: WQ depth in quantas to round up
- */
-static int i40iw_qp_round_up(u32 wqdepth)
-{
- int scount = 1;
-
- for (wqdepth--; scount <= 16; scount *= 2)
- wqdepth |= wqdepth >> scount;
-
- return ++wqdepth;
-}
-
-/**
* i40iw_get_wqe_shift - get shift count for maximum wqe size
* @sge: Maximum Scatter Gather Elements wqe
* @inline_data: Maximum inline data size
@@ -934,7 +920,7 @@ void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift)
*/
enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth)
{
- *sqdepth = i40iw_qp_round_up((sq_size << shift) + I40IW_SQ_RSVD);
+ *sqdepth = roundup_pow_of_two((sq_size << shift) + I40IW_SQ_RSVD);
if (*sqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
*sqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
@@ -953,7 +939,7 @@ enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth)
*/
enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth)
{
- *rqdepth = i40iw_qp_round_up((rq_size << shift) + I40IW_RQ_RSVD);
+ *rqdepth = roundup_pow_of_two((rq_size << shift) + I40IW_RQ_RSVD);
if (*rqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
*rqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index e73efc59a0ab..b125925641e0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -59,7 +59,6 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_CEQ_ENTRIES = 131071,
I40IW_MIN_CQ_SIZE = 1,
I40IW_MAX_CQ_SIZE = 1048575,
- I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
I40IW_DB_ID_ZERO = 0,
I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
I40IW_MAX_SGE_RD = 1,
@@ -72,7 +71,7 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
I40IW_MAX_INLINE_DATA_SIZE = 48,
I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
- I40IW_MAX_IRD_SIZE = 63,
+ I40IW_MAX_IRD_SIZE = 64,
I40IW_MAX_ORD_SIZE = 127,
I40IW_MAX_WQ_ENTRIES = 2048,
I40IW_Q2_BUFFER_SIZE = (248 + 100),
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 8845dba7c438..ddc1056b0b4e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -137,7 +137,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
}
/**
- * i40iw_inetaddr_event - system notifier for netdev events
+ * i40iw_inetaddr_event - system notifier for ipv4 addr events
* @notfier: not used
* @event: event for notifier
* @ptr: if address
@@ -200,7 +200,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
}
/**
- * i40iw_inet6addr_event - system notifier for ipv6 netdev events
+ * i40iw_inet6addr_event - system notifier for ipv6 addr events
* @notfier: not used
* @event: event for notifier
* @ptr: if address
@@ -252,7 +252,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
}
/**
- * i40iw_net_event - system notifier for net events
+ * i40iw_net_event - system notifier for netevents
* @notfier: not used
* @event: event for notifier
* @ptr: neighbor
@@ -297,6 +297,50 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
}
/**
+ * i40iw_netdevice_event - system notifier for netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: netdev
+ */
+int i40iw_netdevice_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *event_netdev;
+ struct net_device *netdev;
+ struct i40iw_device *iwdev;
+ struct i40iw_handler *hdl;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+
+ hdl = i40iw_find_netdev(event_netdev);
+ if (!hdl)
+ return NOTIFY_DONE;
+
+ iwdev = &hdl->device;
+ if (iwdev->init_state < RDMA_DEV_REGISTERED || iwdev->closing)
+ return NOTIFY_DONE;
+
+ netdev = iwdev->ldev->netdev;
+ if (netdev != event_netdev)
+ return NOTIFY_DONE;
+
+ iwdev->iw_status = 1;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ iwdev->iw_status = 0;
+ /* Fall through */
+ case NETDEV_UP:
+ i40iw_port_ibevent(iwdev);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/**
* i40iw_get_cqp_request - get cqp struct
* @cqp: device cqp ptr
* @wait: cqp to be used in wait mode
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 3c6f3ce88f89..70024e8e2692 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -412,6 +412,7 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
{
struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
+ i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
if (qp_num)
i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
@@ -1637,6 +1638,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
err_code = -EOVERFLOW;
goto err;
}
+ stag &= ~I40IW_CQPSQ_STAG_KEY_MASK;
iwmr->stag = stag;
iwmr->ibmr.rkey = stag;
iwmr->ibmr.lkey = stag;
@@ -2242,14 +2244,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.inline_rdma_write.rem_addr.len = ib_wr->sg_list->length;
ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
} else {
info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.rdma_write.rem_addr.len = ib_wr->sg_list->length;
ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
}
@@ -2271,7 +2271,6 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.op_type = I40IW_OP_TYPE_RDMA_READ;
info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.rdma_read.rem_addr.len = ib_wr->sg_list->length;
info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index bf4f14a1b4fc..9a566ee3ceff 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -170,7 +170,7 @@ err_buf:
return err;
}
-#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION
+#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -246,7 +246,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq, vector, 0,
- !!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+ !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
if (err)
goto err_dbmap;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8c8a16791a3f..8d2ee9322f2e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -589,6 +589,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (props->rss_caps.supported_qpts) {
resp.rss_caps.rx_hash_function =
MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
+
resp.rss_caps.rx_hash_fields_mask =
MLX4_IB_RX_HASH_SRC_IPV4 |
MLX4_IB_RX_HASH_DST_IPV4 |
@@ -598,6 +599,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
MLX4_IB_RX_HASH_DST_PORT_TCP |
MLX4_IB_RX_HASH_SRC_PORT_UDP |
MLX4_IB_RX_HASH_DST_PORT_UDP;
+
+ if (dev->dev->caps.tunnel_offload_mode ==
+ MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ resp.rss_caps.rx_hash_fields_mask |=
+ MLX4_IB_RX_HASH_INNER;
}
}
@@ -2995,9 +3001,8 @@ err_steer_free_bitmap:
kfree(ibdev->ib_uc_qpns_bitmap);
err_steer_qp_release:
- if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
- mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
- ibdev->steer_qpn_count);
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
err_counter:
for (i = 0; i < ibdev->num_ports; ++i)
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
@@ -3102,11 +3107,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
ibdev->iboe.nb.notifier_call = NULL;
}
- if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
- mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
- ibdev->steer_qpn_count);
- kfree(ibdev->ib_uc_qpns_bitmap);
- }
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ kfree(ibdev->ib_uc_qpns_bitmap);
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index caf490ab24c8..f045491f2c14 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -734,10 +734,24 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
return (-EOPNOTSUPP);
}
+ if (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_INNER) {
+ if (dev->dev->caps.tunnel_offload_mode ==
+ MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ /*
+ * Hash according to inner headers if exist, otherwise
+ * according to outer headers.
+ */
+ rss_ctx->flags |= MLX4_RSS_BY_INNER_HEADERS_IPONLY;
+ } else {
+ pr_debug("RSS Hash for inner headers isn't supported\n");
+ return (-EOPNOTSUPP);
+ }
+ }
+
return 0;
}
-static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd,
+static int create_qp_rss(struct mlx4_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx4_ib_create_qp_rss *ucmd,
struct mlx4_ib_qp *qp)
@@ -860,7 +874,7 @@ static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp);
+ err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
@@ -1836,6 +1850,8 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev,
mlx4_ib_gid_index_to_real_index(dev, port,
grh->sgid_index);
+ if (real_sgid_index < 0)
+ return real_sgid_index;
if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
pr_err("sgid_index (%u) too large. max is %d\n",
real_sgid_index, dev->dev->caps.gid_table_len[port] - 1);
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 2d32b519bb61..985fa2637390 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -247,21 +247,30 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
}
}
-static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+ int offset, u32 *var)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
void *out;
void *field;
int err;
enum mlx5_ib_cong_node_type node;
+ struct mlx5_core_dev *mdev;
+
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
+ return -ENODEV;
out = kvzalloc(outlen, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
+ if (!out) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
node = mlx5_ib_param_to_node(offset);
- err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
+ err = mlx5_cmd_query_cong_params(mdev, node, out, outlen);
if (err)
goto free;
@@ -270,21 +279,32 @@ static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
free:
kvfree(out);
+alloc_err:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
return err;
}
-static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+ int offset, u32 var)
{
int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
void *in;
void *field;
enum mlx5_ib_cong_node_type node;
+ struct mlx5_core_dev *mdev;
u32 attr_mask = 0;
int err;
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
+ return -ENODEV;
+
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ if (!in) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
MLX5_SET(modify_cong_params_in, in, opcode,
MLX5_CMD_OP_MODIFY_CONG_PARAMS);
@@ -299,8 +319,10 @@ static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
attr_mask);
- err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
+ err = mlx5_cmd_modify_cong_params(mdev, in, inlen);
kvfree(in);
+alloc_err:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
return err;
}
@@ -324,7 +346,7 @@ static ssize_t set_param(struct file *filp, const char __user *buf,
if (kstrtou32(lbuf, 0, &var))
return -EINVAL;
- ret = mlx5_ib_set_cc_params(param->dev, offset, var);
+ ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
return ret ? ret : count;
}
@@ -340,7 +362,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
if (*pos)
return 0;
- ret = mlx5_ib_get_cc_params(param->dev, offset, &var);
+ ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
if (ret)
return ret;
@@ -362,44 +384,51 @@ static const struct file_operations dbg_cc_fops = {
.read = get_param,
};
-void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev)
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
{
if (!mlx5_debugfs_root ||
- !dev->dbg_cc_params ||
- !dev->dbg_cc_params->root)
+ !dev->port[port_num].dbg_cc_params ||
+ !dev->port[port_num].dbg_cc_params->root)
return;
- debugfs_remove_recursive(dev->dbg_cc_params->root);
- kfree(dev->dbg_cc_params);
- dev->dbg_cc_params = NULL;
+ debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
+ kfree(dev->port[port_num].dbg_cc_params);
+ dev->port[port_num].dbg_cc_params = NULL;
}
-int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ struct mlx5_core_dev *mdev;
int i;
if (!mlx5_debugfs_root)
goto out;
- if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) ||
- !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
goto out;
+ if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
+ !MLX5_CAP_GEN(mdev, cc_modify_allowed))
+ goto put_mdev;
+
dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
if (!dbg_cc_params)
- goto out;
+ goto err;
- dev->dbg_cc_params = dbg_cc_params;
+ dev->port[port_num].dbg_cc_params = dbg_cc_params;
dbg_cc_params->root = debugfs_create_dir("cc_params",
- dev->mdev->priv.dbg_root);
+ mdev->priv.dbg_root);
if (!dbg_cc_params->root)
goto err;
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
dbg_cc_params->params[i].offset = i;
dbg_cc_params->params[i].dev = dev;
+ dbg_cc_params->params[i].port_num = port_num;
dbg_cc_params->params[i].dentry =
debugfs_create_file(mlx5_ib_dbg_cc_name[i],
0600, dbg_cc_params->root,
@@ -408,11 +437,17 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
if (!dbg_cc_params->params[i].dentry)
goto err;
}
-out: return 0;
+
+put_mdev:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+out:
+ return 0;
err:
mlx5_ib_warn(dev, "cong debugfs failure\n");
- mlx5_ib_cleanup_cong_debugfs(dev);
+ mlx5_ib_cleanup_cong_debugfs(dev, port_num);
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+
/*
* We don't want to fail driver if debugfs failed to initialize,
* so we are not forwarding error to the user.
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 18705cbcdc8c..5b974fb97611 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -1010,7 +1010,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
MLX5_SET(cqc, cqc, uar_page, index);
MLX5_SET(cqc, cqc, c_eqn, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
- if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1003b0133a49..32a9e9228b13 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -197,10 +197,9 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
+static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
int err;
void *out_cnt;
@@ -222,7 +221,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
if (!out_cnt)
return IB_MAD_RESULT_FAILURE;
- err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
+ err = mlx5_core_query_vport_counter(mdev, 0, 0,
port_num, out_cnt, sz);
if (!err)
pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
@@ -235,7 +234,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
if (!out_cnt)
return IB_MAD_RESULT_FAILURE;
- err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
+ err = mlx5_core_query_ib_ppcnt(mdev, port_num,
out_cnt, sz);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
@@ -255,9 +254,11 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
u16 *out_mad_pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_core_dev *mdev = dev->mdev;
const struct ib_mad *in_mad = (const struct ib_mad *)in;
struct ib_mad *out_mad = (struct ib_mad *)out;
+ struct mlx5_core_dev *mdev;
+ u8 mdev_port_num;
+ int ret;
if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
*out_mad_size != sizeof(*out_mad)))
@@ -265,14 +266,20 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
memset(out_mad->data, 0, sizeof(out_mad->data));
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev)
+ return IB_MAD_RESULT_FAILURE;
+
if (MLX5_CAP_GEN(mdev, vport_counters) &&
in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
- return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
+ ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad);
} else {
- return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+ ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
in_mad, out_mad);
}
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return ret;
}
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
@@ -519,7 +526,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
int ext_active_speed;
int err = -ENOMEM;
- if (port < 1 || port > MLX5_CAP_GEN(mdev, num_ports)) {
+ if (port < 1 || port > dev->num_ports) {
mlx5_ib_warn(dev, "invalid port number %d\n", port);
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 262c1aa2e028..4236c8086820 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -50,16 +50,14 @@
#include <rdma/ib_cache.h>
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
-#include <linux/mlx5/fs.h>
-#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#include "cmd.h"
-#include <linux/mlx5/vport.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -72,10 +70,36 @@ static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION "\n";
+struct mlx5_ib_event_work {
+ struct work_struct work;
+ struct mlx5_core_dev *dev;
+ void *context;
+ enum mlx5_dev_event event;
+ unsigned long param;
+};
+
enum {
MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
};
+static struct workqueue_struct *mlx5_ib_event_wq;
+static LIST_HEAD(mlx5_ib_unaffiliated_port_list);
+static LIST_HEAD(mlx5_ib_dev_list);
+/*
+ * This mutex should be held when accessing either of the above lists
+ */
+static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
+
+struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
+{
+ struct mlx5_ib_dev *dev;
+
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ dev = mpi->ibdev;
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ return dev;
+}
+
static enum rdma_link_layer
mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
@@ -115,24 +139,32 @@ static int get_port_state(struct ib_device *ibdev,
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
+ struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
- roce.nb);
+ u8 port_num = roce->native_port_num;
+ struct mlx5_core_dev *mdev;
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = roce->dev;
+ mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
+ if (!mdev)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
- write_lock(&ibdev->roce.netdev_lock);
- if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
- ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
- write_unlock(&ibdev->roce.netdev_lock);
+ write_lock(&roce->netdev_lock);
+
+ if (ndev->dev.parent == &mdev->pdev->dev)
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
+ NULL : ndev;
+ write_unlock(&roce->netdev_lock);
break;
case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
- struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
+ struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev);
struct net_device *upper = NULL;
if (lag_ndev) {
@@ -140,27 +172,28 @@ static int mlx5_netdev_event(struct notifier_block *this,
dev_put(lag_ndev);
}
- if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
+ if ((upper == ndev || (!upper && ndev == roce->netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
enum ib_port_state port_state;
- if (get_port_state(&ibdev->ib_dev, 1, &port_state))
- return NOTIFY_DONE;
+ if (get_port_state(&ibdev->ib_dev, port_num,
+ &port_state))
+ goto done;
- if (ibdev->roce.last_port_state == port_state)
- return NOTIFY_DONE;
+ if (roce->last_port_state == port_state)
+ goto done;
- ibdev->roce.last_port_state = port_state;
+ roce->last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
if (port_state == IB_PORT_DOWN)
ibev.event = IB_EVENT_PORT_ERR;
else if (port_state == IB_PORT_ACTIVE)
ibev.event = IB_EVENT_PORT_ACTIVE;
else
- return NOTIFY_DONE;
+ goto done;
- ibev.element.port_num = 1;
+ ibev.element.port_num = port_num;
ib_dispatch_event(&ibev);
}
break;
@@ -169,7 +202,8 @@ static int mlx5_netdev_event(struct notifier_block *this,
default:
break;
}
-
+done:
+ mlx5_ib_put_native_port_mdev(ibdev, port_num);
return NOTIFY_DONE;
}
@@ -178,22 +212,88 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
{
struct mlx5_ib_dev *ibdev = to_mdev(device);
struct net_device *ndev;
+ struct mlx5_core_dev *mdev;
- ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
+ mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
+ if (!mdev)
+ return NULL;
+
+ ndev = mlx5_lag_get_roce_netdev(mdev);
if (ndev)
- return ndev;
+ goto out;
/* Ensure ndev does not disappear before we invoke dev_hold()
*/
- read_lock(&ibdev->roce.netdev_lock);
- ndev = ibdev->roce.netdev;
+ read_lock(&ibdev->roce[port_num - 1].netdev_lock);
+ ndev = ibdev->roce[port_num - 1].netdev;
if (ndev)
dev_hold(ndev);
- read_unlock(&ibdev->roce.netdev_lock);
+ read_unlock(&ibdev->roce[port_num - 1].netdev_lock);
+out:
+ mlx5_ib_put_native_port_mdev(ibdev, port_num);
return ndev;
}
+struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
+ u8 ib_port_num,
+ u8 *native_port_num)
+{
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
+ ib_port_num);
+ struct mlx5_core_dev *mdev = NULL;
+ struct mlx5_ib_multiport_info *mpi;
+ struct mlx5_ib_port *port;
+
+ if (native_port_num)
+ *native_port_num = 1;
+
+ if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
+ return ibdev->mdev;
+
+ port = &ibdev->port[ib_port_num - 1];
+ if (!port)
+ return NULL;
+
+ spin_lock(&port->mp.mpi_lock);
+ mpi = ibdev->port[ib_port_num - 1].mp.mpi;
+ if (mpi && !mpi->unaffiliate) {
+ mdev = mpi->mdev;
+ /* If it's the master no need to refcount, it'll exist
+ * as long as the ib_dev exists.
+ */
+ if (!mpi->is_master)
+ mpi->mdev_refcnt++;
+ }
+ spin_unlock(&port->mp.mpi_lock);
+
+ return mdev;
+}
+
+void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num)
+{
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
+ port_num);
+ struct mlx5_ib_multiport_info *mpi;
+ struct mlx5_ib_port *port;
+
+ if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ port = &ibdev->port[port_num - 1];
+
+ spin_lock(&port->mp.mpi_lock);
+ mpi = ibdev->port[port_num - 1].mp.mpi;
+ if (mpi->is_master)
+ goto out;
+
+ mpi->mdev_refcnt--;
+ if (mpi->unaffiliate)
+ complete(&mpi->unref_comp);
+out:
+ spin_unlock(&port->mp.mpi_lock);
+}
+
static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
u8 *active_width)
{
@@ -256,19 +356,33 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
- struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_core_dev *mdev;
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
+ bool put_mdev = true;
u16 qkey_viol_cntr;
u32 eth_prot_oper;
+ u8 mdev_port_num;
int err;
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev) {
+ /* This means the port isn't affiliated yet. Get the
+ * info for the master port instead.
+ */
+ put_mdev = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ port_num = 1;
+ }
+
/* Possible bad flows are checked before filling out props so in case
* of an error it will still be zeroed out.
*/
- err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num);
+ err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper,
+ mdev_port_num);
if (err)
- return err;
+ goto out;
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
@@ -284,12 +398,16 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
props->state = IB_PORT_DOWN;
props->phys_state = 3;
- mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+ mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
props->qkey_viol_cntr = qkey_viol_cntr;
+ /* If this is a stub query for an unaffiliated port stop here */
+ if (!put_mdev)
+ goto out;
+
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
- return 0;
+ goto out;
if (mlx5_lag_is_active(dev->mdev)) {
rcu_read_lock();
@@ -312,7 +430,10 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
- return 0;
+out:
+ if (put_mdev)
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return err;
}
static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
@@ -354,7 +475,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
roce_l3_type, gid->raw, mac, vlan,
- vlan_id);
+ vlan_id, port_num);
}
static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
@@ -438,11 +559,11 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
}
static void get_atomic_caps(struct mlx5_ib_dev *dev,
+ u8 atomic_size_qp,
struct ib_device_attr *props)
{
u8 tmp;
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
- u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
u8 atomic_req_8B_endianness_mode =
MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
@@ -459,6 +580,29 @@ static void get_atomic_caps(struct mlx5_ib_dev *dev,
}
}
+static void get_atomic_caps_qp(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+
+ get_atomic_caps(dev, atomic_size_qp, props);
+}
+
+static void get_atomic_caps_dc(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
+
+ get_atomic_caps(dev, atomic_size_qp, props);
+}
+
+bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev)
+{
+ struct ib_device_attr props = {};
+
+ get_atomic_caps_dc(dev, &props);
+ return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false;
+}
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -587,6 +731,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
+ bool raw_support = !mlx5_core_mp_enabled(mdev);
struct mlx5_ib_query_device_resp resp = {};
size_t resp_len;
u64 max_tso;
@@ -650,7 +795,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
- if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
if (MLX5_CAP_ETH(mdev, csum_cap)) {
/* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
@@ -682,7 +827,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_TCP |
MLX5_RX_HASH_DST_PORT_TCP |
MLX5_RX_HASH_SRC_PORT_UDP |
- MLX5_RX_HASH_DST_PORT_UDP;
+ MLX5_RX_HASH_DST_PORT_UDP |
+ MLX5_RX_HASH_INNER;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
@@ -698,7 +844,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
- MLX5_CAP_GEN(dev->mdev, general_notification_event))
+ MLX5_CAP_GEN(dev->mdev, general_notification_event) &&
+ raw_support)
props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
@@ -706,7 +853,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs) &&
+ raw_support) {
/* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
@@ -746,7 +894,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
- get_atomic_caps(dev, props);
+ get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
@@ -770,7 +918,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
- IB_LINK_LAYER_ETHERNET) {
+ IB_LINK_LAYER_ETHERNET && raw_support) {
props->rss_caps.max_rwq_indirection_tables =
1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
props->rss_caps.max_rwq_indirection_table_size =
@@ -807,7 +955,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.response_length += sizeof(resp.cqe_comp_caps);
}
- if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
+ raw_support) {
if (MLX5_CAP_QOS(mdev, packet_pacing) &&
MLX5_CAP_GEN(mdev, qos)) {
resp.packet_pacing_caps.qp_rate_limit_max =
@@ -866,7 +1015,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) &&
+ raw_support) {
resp.response_length += sizeof(resp.striding_rq_caps);
if (MLX5_CAP_GEN(mdev, striding_rq)) {
resp.striding_rq_caps.min_single_stride_log_num_of_bytes =
@@ -1097,7 +1247,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
}
if (!ret && props) {
- count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_core_dev *mdev;
+ bool put_mdev = true;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port, NULL);
+ if (!mdev) {
+ /* If the port isn't affiliated yet query the master.
+ * The master and slave will have the same values.
+ */
+ mdev = dev->mdev;
+ port = 1;
+ put_mdev = false;
+ }
+ count = mlx5_core_reserved_gids_count(mdev);
+ if (put_mdev)
+ mlx5_ib_put_native_port_mdev(dev, port);
props->gid_tbl_len -= count;
}
return ret;
@@ -1122,20 +1287,43 @@ static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
}
-static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
+static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port,
+ u16 index, u16 *pkey)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_core_dev *mdev;
+ bool put_mdev = true;
+ u8 mdev_port_num;
+ int err;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num);
+ if (!mdev) {
+ /* The port isn't affiliated yet, get the PKey from the master
+ * port. For RoCE the PKey tables will be the same.
+ */
+ put_mdev = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
+
+ err = mlx5_query_hca_vport_pkey(mdev, 0, mdev_port_num, 0,
+ index, pkey);
+ if (put_mdev)
+ mlx5_ib_put_native_port_mdev(dev, port);
+ return err;
+}
+
+static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
- return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
- pkey);
+ return mlx5_query_hca_nic_pkey(ibdev, port, index, pkey);
default:
return -EINVAL;
}
@@ -1174,23 +1362,32 @@ static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
u32 value)
{
struct mlx5_hca_vport_context ctx = {};
+ struct mlx5_core_dev *mdev;
+ u8 mdev_port_num;
int err;
- err = mlx5_query_hca_vport_context(dev->mdev, 0,
- port_num, 0, &ctx);
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev)
+ return -ENODEV;
+
+ err = mlx5_query_hca_vport_context(mdev, 0, mdev_port_num, 0, &ctx);
if (err)
- return err;
+ goto out;
if (~ctx.cap_mask1_perm & mask) {
mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
mask, ctx.cap_mask1_perm);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
ctx.cap_mask1 = value;
ctx.cap_mask1_perm = mask;
- err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
- port_num, 0, &ctx);
+ err = mlx5_core_modify_hca_vport_context(mdev, 0, mdev_port_num,
+ 0, &ctx);
+
+out:
+ mlx5_ib_put_native_port_mdev(dev, port_num);
return err;
}
@@ -1241,9 +1438,18 @@ static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
}
+static u16 calc_dynamic_bfregs(int uars_per_sys_page)
+{
+ /* Large page with non 4k uar support might limit the dynamic size */
+ if (uars_per_sys_page == 1 && PAGE_SIZE > 4096)
+ return MLX5_MIN_DYN_BFREGS;
+
+ return MLX5_MAX_DYN_BFREGS;
+}
+
static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
struct mlx5_ib_alloc_ucontext_req_v2 *req,
- u32 *num_sys_pages)
+ struct mlx5_bfreg_info *bfregi)
{
int uars_per_sys_page;
int bfregs_per_sys_page;
@@ -1260,16 +1466,21 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
+ /* This holds the required static allocation asked by the user */
req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
- *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
-
if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
return -EINVAL;
- mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n",
+ bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
+ bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page);
+ bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs;
+ bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page;
+
+ mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n",
MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
lib_uar_4k ? "yes" : "no", ref_bfregs,
- req->total_num_bfregs, *num_sys_pages);
+ req->total_num_bfregs, bfregi->total_num_bfregs,
+ bfregi->num_sys_pages);
return 0;
}
@@ -1281,13 +1492,17 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
int i;
bfregi = &context->bfregi;
- for (i = 0; i < bfregi->num_sys_pages; i++) {
+ for (i = 0; i < bfregi->num_static_sys_pages; i++) {
err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
if (err)
goto error;
mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
}
+
+ for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++)
+ bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX;
+
return 0;
error:
@@ -1306,12 +1521,16 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con
bfregi = &context->bfregi;
for (i = 0; i < bfregi->num_sys_pages; i++) {
- err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
- if (err) {
- mlx5_ib_warn(dev, "failed to free uar %d\n", i);
- return err;
+ if (i < bfregi->num_static_sys_pages ||
+ bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) {
+ err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err);
+ return err;
+ }
}
}
+
return 0;
}
@@ -1362,6 +1581,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
+ struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_ucontext *context;
struct mlx5_bfreg_info *bfregi;
int ver;
@@ -1422,13 +1642,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
bfregi = &context->bfregi;
/* updates req->total_num_bfregs */
- err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages);
+ err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
if (err)
goto out_ctx;
mutex_init(&bfregi->lock);
bfregi->lib_uar_4k = lib_uar_4k;
- bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count),
+ bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count),
GFP_KERNEL);
if (!bfregi->count) {
err = -ENOMEM;
@@ -1470,7 +1690,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
mutex_init(&context->db_page_mutex);
resp.tot_bfregs = req.total_num_bfregs;
- resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
+ resp.num_ports = dev->num_ports;
if (field_avail(typeof(resp), cqe_version, udata->outlen))
resp.response_length += sizeof(resp.cqe_version);
@@ -1489,6 +1709,12 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length += sizeof(resp.eth_min_inline);
}
+ if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) {
+ if (mdev->clock_info)
+ resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
+ resp.response_length += sizeof(resp.clock_info_versions);
+ }
+
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
@@ -1502,8 +1728,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.hca_core_clock_offset =
offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
}
- resp.response_length += sizeof(resp.hca_core_clock_offset) +
- sizeof(resp.reserved2);
+ resp.response_length += sizeof(resp.hca_core_clock_offset);
}
if (field_avail(typeof(resp), log_uar_size, udata->outlen))
@@ -1512,6 +1737,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
resp.response_length += sizeof(resp.num_uars_per_page);
+ if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) {
+ resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
+ resp.response_length += sizeof(resp.num_dyn_bfregs);
+ }
+
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_td;
@@ -1566,15 +1796,13 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
- struct mlx5_bfreg_info *bfregi,
- int idx)
+ int uar_idx)
{
int fw_uars_per_page;
fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) +
- bfregi->sys_pages[idx] / fw_uars_per_page;
+ return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
}
static int get_command(unsigned long offset)
@@ -1592,6 +1820,12 @@ static int get_index(unsigned long offset)
return get_arg(offset);
}
+/* Index resides in an extra byte to enable larger values than 255 */
+static int get_extended_index(unsigned long offset)
+{
+ return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
+}
+
static void mlx5_ib_vma_open(struct vm_area_struct *area)
{
/* vma_open is called when a new VMA is created on top of our VMA. This
@@ -1733,6 +1967,38 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
}
}
+static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct mlx5_ib_ucontext *context)
+{
+ phys_addr_t pfn;
+ int err;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
+ return -EOPNOTSUPP;
+
+ if (vma->vm_flags & VM_WRITE)
+ return -EPERM;
+
+ if (!dev->mdev->clock_info_page)
+ return -EOPNOTSUPP;
+
+ pfn = page_to_pfn(dev->mdev->clock_info_page);
+ err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
+ vma->vm_page_prot);
+ if (err)
+ return err;
+
+ mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n",
+ vma->vm_start,
+ (unsigned long long)pfn << PAGE_SHIFT);
+
+ return mlx5_ib_set_vma_data(vma, context);
+}
+
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
@@ -1742,21 +2008,29 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
unsigned long idx;
phys_addr_t pfn, pa;
pgprot_t prot;
- int uars_per_page;
+ u32 bfreg_dyn_idx = 0;
+ u32 uar_index;
+ int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC);
+ int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
+ bfregi->num_static_sys_pages;
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
- uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
- idx = get_index(vma->vm_pgoff);
- if (idx % uars_per_page ||
- idx * uars_per_page >= bfregi->num_sys_pages) {
- mlx5_ib_warn(dev, "invalid uar index %lu\n", idx);
+ if (dyn_uar)
+ idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages;
+ else
+ idx = get_index(vma->vm_pgoff);
+
+ if (idx >= max_valid_idx) {
+ mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n",
+ idx, max_valid_idx);
return -EINVAL;
}
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
+ case MLX5_IB_MMAP_ALLOC_WC:
/* Some architectures don't support WC memory */
#if defined(CONFIG_X86)
if (!pat_enabled())
@@ -1776,7 +2050,40 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
return -EINVAL;
}
- pfn = uar_index2pfn(dev, bfregi, idx);
+ if (dyn_uar) {
+ int uars_per_page;
+
+ uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
+ bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR);
+ if (bfreg_dyn_idx >= bfregi->total_num_bfregs) {
+ mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n",
+ bfreg_dyn_idx, bfregi->total_num_bfregs);
+ return -EINVAL;
+ }
+
+ mutex_lock(&bfregi->lock);
+ /* Fail if uar already allocated, first bfreg index of each
+ * page holds its count.
+ */
+ if (bfregi->count[bfreg_dyn_idx]) {
+ mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx);
+ mutex_unlock(&bfregi->lock);
+ return -EINVAL;
+ }
+
+ bfregi->count[bfreg_dyn_idx]++;
+ mutex_unlock(&bfregi->lock);
+
+ err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+ if (err) {
+ mlx5_ib_warn(dev, "UAR alloc failed\n");
+ goto free_bfreg;
+ }
+ } else {
+ uar_index = bfregi->sys_pages[idx];
+ }
+
+ pfn = uar_index2pfn(dev, uar_index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
vma->vm_page_prot = prot;
@@ -1785,14 +2092,32 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
if (err) {
mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
- return -EAGAIN;
+ err = -EAGAIN;
+ goto err;
}
pa = pfn << PAGE_SHIFT;
mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
vma->vm_start, &pa);
- return mlx5_ib_set_vma_data(vma, context);
+ err = mlx5_ib_set_vma_data(vma, context);
+ if (err)
+ goto err;
+
+ if (dyn_uar)
+ bfregi->sys_pages[idx] = uar_index;
+ return 0;
+
+err:
+ if (!dyn_uar)
+ return err;
+
+ mlx5_cmd_free_uar(dev->mdev, idx);
+
+free_bfreg:
+ mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
+
+ return err;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -1807,6 +2132,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
+ case MLX5_IB_MMAP_ALLOC_WC:
return uar_mmap(dev, command, vma, context);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
@@ -1835,6 +2161,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
vma->vm_start,
(unsigned long long)pfn << PAGE_SHIFT);
break;
+ case MLX5_IB_MMAP_CLOCK_INFO:
+ return mlx5_ib_mmap_clock_info_page(dev, vma, context);
default:
return -EINVAL;
@@ -2663,7 +2991,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
return ERR_PTR(-ENOMEM);
if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
+ flow_attr->port > dev->num_ports ||
(flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
return ERR_PTR(-EINVAL);
@@ -2928,15 +3256,24 @@ static void delay_drop_handler(struct work_struct *work)
mutex_unlock(&delay_drop->lock);
}
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
- enum mlx5_dev_event event, unsigned long param)
+static void mlx5_ib_handle_event(struct work_struct *_work)
{
- struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
+ struct mlx5_ib_event_work *work =
+ container_of(_work, struct mlx5_ib_event_work, work);
+ struct mlx5_ib_dev *ibdev;
struct ib_event ibev;
bool fatal = false;
u8 port = 0;
- switch (event) {
+ if (mlx5_core_is_mp_slave(work->dev)) {
+ ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
+ if (!ibdev)
+ goto out;
+ } else {
+ ibdev = work->context;
+ }
+
+ switch (work->event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
@@ -2946,39 +3283,39 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
case MLX5_DEV_EVENT_PORT_UP:
case MLX5_DEV_EVENT_PORT_DOWN:
case MLX5_DEV_EVENT_PORT_INITIALIZED:
- port = (u8)param;
+ port = (u8)work->param;
/* In RoCE, port up/down events are handled in
* mlx5_netdev_event().
*/
if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET)
- return;
+ goto out;
- ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
+ ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
break;
case MLX5_DEV_EVENT_LID_CHANGE:
ibev.event = IB_EVENT_LID_CHANGE;
- port = (u8)param;
+ port = (u8)work->param;
break;
case MLX5_DEV_EVENT_PKEY_CHANGE:
ibev.event = IB_EVENT_PKEY_CHANGE;
- port = (u8)param;
+ port = (u8)work->param;
schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
case MLX5_DEV_EVENT_GUID_CHANGE:
ibev.event = IB_EVENT_GID_CHANGE;
- port = (u8)param;
+ port = (u8)work->param;
break;
case MLX5_DEV_EVENT_CLIENT_REREG:
ibev.event = IB_EVENT_CLIENT_REREGISTER;
- port = (u8)param;
+ port = (u8)work->param;
break;
case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
schedule_work(&ibdev->delay_drop.delay_drop_work);
@@ -3000,9 +3337,26 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (fatal)
ibdev->ib_active = false;
-
out:
- return;
+ kfree(work);
+}
+
+static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
+ enum mlx5_dev_event event, unsigned long param)
+{
+ struct mlx5_ib_event_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ INIT_WORK(&work->work, mlx5_ib_handle_event);
+ work->dev = dev;
+ work->param = param;
+ work->context = context;
+ work->event = event;
+
+ queue_work(mlx5_ib_event_wq, &work->work);
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -3011,7 +3365,7 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
int err;
int port;
- for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ for (port = 1; port <= dev->num_ports; port++) {
dev->mdev->port_caps[port - 1].has_smi = false;
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB) {
@@ -3038,16 +3392,15 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
- for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
+ for (port = 1; port <= dev->num_ports; port++)
mlx5_query_ext_port_caps(dev, port);
}
-static int get_port_caps(struct mlx5_ib_dev *dev)
+static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
{
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
int err = -ENOMEM;
- int port;
struct ib_udata uhw = {.inlen = 0, .outlen = 0};
pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
@@ -3068,22 +3421,21 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
goto out;
}
- for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
- memset(pprops, 0, sizeof(*pprops));
- err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
- if (err) {
- mlx5_ib_warn(dev, "query_port %d failed %d\n",
- port, err);
- break;
- }
- dev->mdev->port_caps[port - 1].pkey_table_len =
- dprops->max_pkeys;
- dev->mdev->port_caps[port - 1].gid_table_len =
- pprops->gid_tbl_len;
- mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
- dprops->max_pkeys, pprops->gid_tbl_len);
+ memset(pprops, 0, sizeof(*pprops));
+ err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
+ if (err) {
+ mlx5_ib_warn(dev, "query_port %d failed %d\n",
+ port, err);
+ goto out;
}
+ dev->mdev->port_caps[port - 1].pkey_table_len =
+ dprops->max_pkeys;
+ dev->mdev->port_caps[port - 1].gid_table_len =
+ pprops->gid_tbl_len;
+ mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n",
+ port, dprops->max_pkeys, pprops->gid_tbl_len);
+
out:
kfree(pprops);
kfree(dprops);
@@ -3373,12 +3725,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
u32 ret = 0;
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
- ret = RDMA_CORE_PORT_RAW_PACKET;
+ if (raw_support)
+ ret = RDMA_CORE_PORT_RAW_PACKET;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
return ret;
@@ -3468,33 +3822,33 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
+static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
{
int err;
- dev->roce.nb.notifier_call = mlx5_netdev_event;
- err = register_netdevice_notifier(&dev->roce.nb);
+ dev->roce[port_num].nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->roce[port_num].nb);
if (err) {
- dev->roce.nb.notifier_call = NULL;
+ dev->roce[port_num].nb.notifier_call = NULL;
return err;
}
return 0;
}
-static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
+static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
{
- if (dev->roce.nb.notifier_call) {
- unregister_netdevice_notifier(&dev->roce.nb);
- dev->roce.nb.notifier_call = NULL;
+ if (dev->roce[port_num].nb.notifier_call) {
+ unregister_netdevice_notifier(&dev->roce[port_num].nb);
+ dev->roce[port_num].nb.notifier_call = NULL;
}
}
-static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
+static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
{
int err;
- err = mlx5_add_netdev_notifier(dev);
+ err = mlx5_add_netdev_notifier(dev, port_num);
if (err)
return err;
@@ -3515,7 +3869,7 @@ err_disable_roce:
mlx5_nic_vport_disable_roce(dev->mdev);
err_unregister_netdevice_notifier:
- mlx5_remove_netdev_notifier(dev);
+ mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
@@ -3577,11 +3931,12 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
- unsigned int i;
+ int i;
for (i = 0; i < dev->num_ports; i++) {
- mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].cnts.set_id);
+ if (dev->port[i].cnts.set_id)
+ mlx5_core_dealloc_q_counter(dev->mdev,
+ dev->port[i].cnts.set_id);
kfree(dev->port[i].cnts.names);
kfree(dev->port[i].cnts.offsets);
}
@@ -3623,6 +3978,7 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
err_names:
kfree(cnts->names);
+ cnts->names = NULL;
return -ENOMEM;
}
@@ -3669,37 +4025,33 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
+ int err = 0;
int i;
- int ret;
for (i = 0; i < dev->num_ports; i++) {
- struct mlx5_ib_port *port = &dev->port[i];
+ err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
+ if (err)
+ goto err_alloc;
+
+ mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
+ dev->port[i].cnts.offsets);
- ret = mlx5_core_alloc_q_counter(dev->mdev,
- &port->cnts.set_id);
- if (ret) {
+ err = mlx5_core_alloc_q_counter(dev->mdev,
+ &dev->port[i].cnts.set_id);
+ if (err) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
- i + 1, ret);
- goto dealloc_counters;
+ i + 1, err);
+ goto err_alloc;
}
-
- ret = __mlx5_ib_alloc_counters(dev, &port->cnts);
- if (ret)
- goto dealloc_counters;
-
- mlx5_ib_fill_counters(dev, port->cnts.names,
- port->cnts.offsets);
+ dev->port[i].cnts.set_id_valid = true;
}
return 0;
-dealloc_counters:
- while (--i >= 0)
- mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].cnts.set_id);
-
- return ret;
+err_alloc:
+ mlx5_ib_dealloc_counters(dev);
+ return err;
}
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
@@ -3718,7 +4070,7 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
-static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
+static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
struct mlx5_ib_port *port,
struct rdma_hw_stats *stats)
{
@@ -3731,7 +4083,7 @@ static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
if (!out)
return -ENOMEM;
- ret = mlx5_core_query_q_counter(dev->mdev,
+ ret = mlx5_core_query_q_counter(mdev,
port->cnts.set_id, 0,
out, outlen);
if (ret)
@@ -3753,28 +4105,43 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ struct mlx5_core_dev *mdev;
int ret, num_counters;
+ u8 mdev_port_num;
if (!stats)
return -EINVAL;
- ret = mlx5_ib_query_q_counters(dev, port, stats);
+ num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+
+ /* q_counters are per IB device, query the master mdev */
+ ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
if (ret)
return ret;
- num_counters = port->cnts.num_q_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
+ &mdev_port_num);
+ if (!mdev) {
+ /* If port is not affiliated yet, its in down state
+ * which doesn't have any counters yet, so it would be
+ * zero. So no need to read from the HCA.
+ */
+ goto done;
+ }
ret = mlx5_lag_query_cong_counters(dev->mdev,
stats->value +
port->cnts.num_q_counters,
port->cnts.num_cong_counters,
port->cnts.offsets +
port->cnts.num_q_counters);
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
if (ret)
return ret;
- num_counters += port->cnts.num_cong_counters;
}
+done:
return num_counters;
}
@@ -3936,36 +4303,250 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
return mlx5_get_vector_affinity(dev->mdev, comp_vector);
}
-static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
+/* The mlx5_ib_multiport_mutex should be held when calling this function */
+static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
+ struct mlx5_ib_multiport_info *mpi)
{
- struct mlx5_ib_dev *dev;
- enum rdma_link_layer ll;
- int port_type_cap;
- const char *name;
+ u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ struct mlx5_ib_port *port = &ibdev->port[port_num];
+ int comps;
int err;
int i;
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+ mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
- printk_once(KERN_INFO "%s", mlx5_version);
+ spin_lock(&port->mp.mpi_lock);
+ if (!mpi->ibdev) {
+ spin_unlock(&port->mp.mpi_lock);
+ return;
+ }
+ mpi->ibdev = NULL;
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
- if (!dev)
- return NULL;
+ spin_unlock(&port->mp.mpi_lock);
+ mlx5_remove_netdev_notifier(ibdev, port_num);
+ spin_lock(&port->mp.mpi_lock);
- dev->mdev = mdev;
+ comps = mpi->mdev_refcnt;
+ if (comps) {
+ mpi->unaffiliate = true;
+ init_completion(&mpi->unref_comp);
+ spin_unlock(&port->mp.mpi_lock);
+
+ for (i = 0; i < comps; i++)
+ wait_for_completion(&mpi->unref_comp);
+
+ spin_lock(&port->mp.mpi_lock);
+ mpi->unaffiliate = false;
+ }
+
+ port->mp.mpi = NULL;
+
+ list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
+
+ spin_unlock(&port->mp.mpi_lock);
+
+ err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev);
+
+ mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1);
+ /* Log an error, still needed to cleanup the pointers and add
+ * it back to the list.
+ */
+ if (err)
+ mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n",
+ port_num + 1);
+
+ ibdev->roce[port_num].last_port_state = IB_PORT_DOWN;
+}
+
+/* The mlx5_ib_multiport_mutex should be held when calling this function */
+static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
+ struct mlx5_ib_multiport_info *mpi)
+{
+ u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ int err;
+
+ spin_lock(&ibdev->port[port_num].mp.mpi_lock);
+ if (ibdev->port[port_num].mp.mpi) {
+ mlx5_ib_warn(ibdev, "port %d already affiliated.\n",
+ port_num + 1);
+ spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
+ return false;
+ }
+
+ ibdev->port[port_num].mp.mpi = mpi;
+ mpi->ibdev = ibdev;
+ spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
+
+ err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
+ if (err)
+ goto unbind;
+
+ err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev));
+ if (err)
+ goto unbind;
+
+ err = mlx5_add_netdev_notifier(ibdev, port_num);
+ if (err) {
+ mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
+ port_num + 1);
+ goto unbind;
+ }
+
+ err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
+ if (err)
+ goto unbind;
+
+ return true;
+
+unbind:
+ mlx5_ib_unbind_slave_port(ibdev, mpi);
+ return false;
+}
+
+static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
+{
+ int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
+ port_num + 1);
+ struct mlx5_ib_multiport_info *mpi;
+ int err;
+ int i;
+
+ if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
+ return 0;
+
+ err = mlx5_query_nic_vport_system_image_guid(dev->mdev,
+ &dev->sys_image_guid);
+ if (err)
+ return err;
- dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ return err;
+
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ for (i = 0; i < dev->num_ports; i++) {
+ bool bound = false;
+
+ /* build a stub multiport info struct for the native port. */
+ if (i == port_num) {
+ mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
+ if (!mpi) {
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ mlx5_nic_vport_disable_roce(dev->mdev);
+ return -ENOMEM;
+ }
+
+ mpi->is_master = true;
+ mpi->mdev = dev->mdev;
+ mpi->sys_image_guid = dev->sys_image_guid;
+ dev->port[i].mp.mpi = mpi;
+ mpi->ibdev = dev;
+ mpi = NULL;
+ continue;
+ }
+
+ list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list,
+ list) {
+ if (dev->sys_image_guid == mpi->sys_image_guid &&
+ (mlx5_core_native_port_num(mpi->mdev) - 1) == i) {
+ bound = mlx5_ib_bind_slave_port(dev, mpi);
+ }
+
+ if (bound) {
+ dev_dbg(&mpi->mdev->pdev->dev, "removing port from unaffiliated list.\n");
+ mlx5_ib_dbg(dev, "port %d bound\n", i + 1);
+ list_del(&mpi->list);
+ break;
+ }
+ }
+ if (!bound) {
+ get_port_caps(dev, i + 1);
+ mlx5_ib_dbg(dev, "no free port found for port %d\n",
+ i + 1);
+ }
+ }
+
+ list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ return err;
+}
+
+static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
+{
+ int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
+ port_num + 1);
+ int i;
+
+ if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ for (i = 0; i < dev->num_ports; i++) {
+ if (dev->port[i].mp.mpi) {
+ /* Destroy the native port stub */
+ if (i == port_num) {
+ kfree(dev->port[i].mp.mpi);
+ dev->port[i].mp.mpi = NULL;
+ } else {
+ mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1);
+ mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi);
+ }
+ }
+ }
+
+ mlx5_ib_dbg(dev, "removing from devlist\n");
+ list_del(&dev->ib_dev_list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+
+ mlx5_nic_vport_disable_roce(dev->mdev);
+}
+
+static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_cleanup_multiport_master(dev);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ cleanup_srcu_struct(&dev->mr_srcu);
+#endif
+ kfree(dev->port);
+}
+
+static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ const char *name;
+ int err;
+ int i;
+
+ dev->port = kcalloc(dev->num_ports, sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port)
- goto err_dealloc;
+ return -ENOMEM;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ spin_lock_init(&dev->port[i].mp.mpi_lock);
+ rwlock_init(&dev->roce[i].netdev_lock);
+ }
- rwlock_init(&dev->roce.netdev_lock);
- err = get_port_caps(dev);
+ err = mlx5_ib_init_multiport_master(dev);
if (err)
goto err_free_port;
+ if (!mlx5_core_mp_enabled(mdev)) {
+ int i;
+
+ for (i = 1; i <= dev->num_ports; i++) {
+ err = get_port_caps(dev, i);
+ if (err)
+ break;
+ }
+ } else {
+ err = get_port_caps(dev, mlx5_core_native_port_num(mdev));
+ }
+ if (err)
+ goto err_mp;
+
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
@@ -3978,12 +4559,37 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
- dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
- dev->ib_dev.phys_port_cnt = dev->num_ports;
+ dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dev.parent = &mdev->pdev->dev;
+ mutex_init(&dev->flow_db.lock);
+ mutex_init(&dev->cap_mask_mutex);
+ INIT_LIST_HEAD(&dev->qp_list);
+ spin_lock_init(&dev->reset_flow_resource_lock);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ err = init_srcu_struct(&dev->mr_srcu);
+ if (err)
+ goto err_free_port;
+#endif
+
+ return 0;
+err_mp:
+ mlx5_ib_cleanup_multiport_master(dev);
+
+err_free_port:
+ kfree(dev->port);
+
+ return -ENOMEM;
+}
+
+static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ int err;
+
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -4022,8 +4628,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
- if (ll == IB_LINK_LAYER_ETHERNET)
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
dev->ib_dev.del_gid = mlx5_ib_del_gid;
@@ -4080,8 +4684,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
- mlx5_ib_internal_fill_odp_caps(dev);
-
dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
if (MLX5_CAP_GEN(mdev, imaicl)) {
@@ -4092,11 +4694,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
- dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
- }
-
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -4111,8 +4708,39 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
- if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
- IB_LINK_LAYER_ETHERNET) {
+ err = init_node_data(dev);
+ if (err)
+ return err;
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
+ MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
+ mutex_init(&dev->lb_mutex);
+
+ return 0;
+}
+
+static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
+ u8 port_num;
+ int err;
+ int i;
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->roce[i].dev = dev;
+ dev->roce[i].native_port_num = i + 1;
+ dev->roce[i].last_port_state = IB_PORT_DOWN;
+ }
+
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
@@ -4124,143 +4752,329 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ err = mlx5_enable_eth(dev, port_num);
+ if (err)
+ return err;
}
- err = init_node_data(dev);
- if (err)
- goto err_free_port;
- mutex_init(&dev->flow_db.lock);
- mutex_init(&dev->cap_mask_mutex);
- INIT_LIST_HEAD(&dev->qp_list);
- spin_lock_init(&dev->reset_flow_resource_lock);
+ return 0;
+}
+
+static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
+ u8 port_num;
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_enable_eth(dev);
- if (err)
- goto err_free_port;
- dev->roce.last_port_state = IB_PORT_DOWN;
+ mlx5_disable_eth(dev);
+ mlx5_remove_netdev_notifier(dev, port_num);
}
+}
- err = create_dev_resources(&dev->devr);
- if (err)
- goto err_disable_eth;
+static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+{
+ return create_dev_resources(&dev->devr);
+}
- err = mlx5_ib_odp_init_one(dev);
- if (err)
- goto err_rsrc;
+static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+{
+ destroy_dev_resources(&dev->devr);
+}
+static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_internal_fill_odp_caps(dev);
+
+ return mlx5_ib_odp_init_one(dev);
+}
+
+static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- err = mlx5_ib_alloc_counters(dev);
- if (err)
- goto err_odp;
+ dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
+ dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
+
+ return mlx5_ib_alloc_counters(dev);
}
- err = mlx5_ib_init_cong_debugfs(dev);
- if (err)
- goto err_cnt;
+ return 0;
+}
+
+static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_counters(dev);
+}
+static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ return mlx5_ib_init_cong_debugfs(dev,
+ mlx5_core_native_port_num(dev->mdev) - 1);
+}
+
+static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_cleanup_cong_debugfs(dev,
+ mlx5_core_native_port_num(dev->mdev) - 1);
+}
+
+static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
+{
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
- if (IS_ERR(dev->mdev->priv.uar))
- goto err_cong;
+ if (!dev->mdev->priv.uar)
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
+}
+
+static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+{
+ int err;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
- goto err_uar_page;
+ return err;
err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
if (err)
- goto err_bfreg;
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
- err = ib_register_device(&dev->ib_dev, NULL);
- if (err)
- goto err_fp_bfreg;
+ return err;
+}
- err = create_umr_res(dev);
- if (err)
- goto err_dev;
+static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+}
+
+static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+{
+ return ib_register_device(&dev->ib_dev, NULL);
+}
+
+static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+ ib_unregister_device(&dev->ib_dev);
+}
+static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+{
+ return create_umr_res(dev);
+}
+
+static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+{
+ destroy_umrc_res(dev);
+}
+
+static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
+{
init_delay_drop(dev);
+ return 0;
+}
+
+static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
+{
+ cancel_delay_drop(dev);
+}
+
+static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+{
+ int err;
+ int i;
+
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
- goto err_delay_drop;
+ return err;
}
- if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
- (MLX5_CAP_GEN(mdev, disable_local_lb_uc) ||
- MLX5_CAP_GEN(mdev, disable_local_lb_mc)))
- mutex_init(&dev->lb_mutex);
+ return 0;
+}
+
+static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage)
+{
+ /* Number of stages to cleanup */
+ while (stage) {
+ stage--;
+ if (profile->stage[stage].cleanup)
+ profile->stage[stage].cleanup(dev);
+ }
+
+ ib_dealloc_device((struct ib_device *)dev);
+}
+
+static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
+
+static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
+ const struct mlx5_ib_profile *profile)
+{
+ struct mlx5_ib_dev *dev;
+ int err;
+ int i;
+
+ printk_once(KERN_INFO "%s", mlx5_version);
+ dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ dev->mdev = mdev;
+ dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+ for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
+ if (profile->stage[i].init) {
+ err = profile->stage[i].init(dev);
+ if (err)
+ goto err_out;
+ }
+ }
+
+ dev->profile = profile;
dev->ib_active = true;
return dev;
-err_delay_drop:
- cancel_delay_drop(dev);
- destroy_umrc_res(dev);
+err_out:
+ __mlx5_ib_remove(dev, profile, i);
-err_dev:
- ib_unregister_device(&dev->ib_dev);
+ return NULL;
+}
-err_fp_bfreg:
- mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+static const struct mlx5_ib_profile pf_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_roce_init,
+ mlx5_ib_stage_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_ODP,
+ mlx5_ib_stage_odp_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
+ mlx5_ib_stage_cong_debugfs_init,
+ mlx5_ib_stage_cong_debugfs_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UAR,
+ mlx5_ib_stage_uar_init,
+ mlx5_ib_stage_uar_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+ mlx5_ib_stage_umr_res_init,
+ mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
+ mlx5_ib_stage_delay_drop_init,
+ mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+};
-err_bfreg:
- mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
+{
+ struct mlx5_ib_multiport_info *mpi;
+ struct mlx5_ib_dev *dev;
+ bool bound = false;
+ int err;
-err_uar_page:
- mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
+ mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
+ if (!mpi)
+ return NULL;
-err_cong:
- mlx5_ib_cleanup_cong_debugfs(dev);
-err_cnt:
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_counters(dev);
+ mpi->mdev = mdev;
-err_odp:
- mlx5_ib_odp_remove_one(dev);
+ err = mlx5_query_nic_vport_system_image_guid(mdev,
+ &mpi->sys_image_guid);
+ if (err) {
+ kfree(mpi);
+ return NULL;
+ }
-err_rsrc:
- destroy_dev_resources(&dev->devr);
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) {
+ if (dev->sys_image_guid == mpi->sys_image_guid)
+ bound = mlx5_ib_bind_slave_port(dev, mpi);
-err_disable_eth:
- if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_eth(dev);
- mlx5_remove_netdev_notifier(dev);
+ if (bound) {
+ rdma_roce_rescan_device(&dev->ib_dev);
+ break;
+ }
}
-err_free_port:
- kfree(dev->port);
+ if (!bound) {
+ list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
+ dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
+ } else {
+ mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1);
+ }
+ mutex_unlock(&mlx5_ib_multiport_mutex);
-err_dealloc:
- ib_dealloc_device((struct ib_device *)dev);
+ return mpi;
+}
- return NULL;
+static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
+{
+ enum rdma_link_layer ll;
+ int port_type_cap;
+
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) {
+ u8 port_num = mlx5_core_native_port_num(mdev) - 1;
+
+ return mlx5_ib_add_slave_port(mdev, port_num);
+ }
+
+ return __mlx5_ib_add(mdev, &pf_profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
- struct mlx5_ib_dev *dev = context;
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
+ struct mlx5_ib_multiport_info *mpi;
+ struct mlx5_ib_dev *dev;
- cancel_delay_drop(dev);
- mlx5_remove_netdev_notifier(dev);
- ib_unregister_device(&dev->ib_dev);
- mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
- mlx5_free_bfreg(dev->mdev, &dev->bfreg);
- mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
- mlx5_ib_cleanup_cong_debugfs(dev);
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_counters(dev);
- destroy_umrc_res(dev);
- mlx5_ib_odp_remove_one(dev);
- destroy_dev_resources(&dev->devr);
- if (ll == IB_LINK_LAYER_ETHERNET)
- mlx5_disable_eth(dev);
- kfree(dev->port);
- ib_dealloc_device(&dev->ib_dev);
+ if (mlx5_core_is_mp_slave(mdev)) {
+ mpi = context;
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ if (mpi->ibdev)
+ mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
+ list_del(&mpi->list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ return;
+ }
+
+ dev = context;
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
static struct mlx5_interface mlx5_ib_interface = {
@@ -4277,6 +5091,10 @@ static int __init mlx5_ib_init(void)
{
int err;
+ mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
+ if (!mlx5_ib_event_wq)
+ return -ENOMEM;
+
mlx5_ib_odp_init();
err = mlx5_register_interface(&mlx5_ib_interface);
@@ -4287,6 +5105,7 @@ static int __init mlx5_ib_init(void)
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
+ destroy_workqueue(mlx5_ib_event_wq);
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2c5f3533bbc9..139385129973 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -70,15 +70,6 @@ enum {
MLX5_IB_MMAP_CMD_MASK = 0xff,
};
-enum mlx5_ib_mmap_cmd {
- MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
- MLX5_IB_MMAP_WC_PAGE = 2,
- MLX5_IB_MMAP_NC_PAGE = 3,
- /* 5 is chosen in order to be compatible with old versions of libmlx5 */
- MLX5_IB_MMAP_CORE_CLOCK = 5,
-};
-
enum {
MLX5_RES_SCAT_DATA32_CQE = 0x1,
MLX5_RES_SCAT_DATA64_CQE = 0x2,
@@ -112,6 +103,11 @@ enum {
MLX5_TM_MAX_SGE = 1,
};
+enum {
+ MLX5_IB_INVALID_UAR_INDEX = BIT(31),
+ MLX5_IB_INVALID_BFREG = BIT(31),
+};
+
struct mlx5_ib_vma_private_data {
struct list_head list;
struct vm_area_struct *vma;
@@ -200,6 +196,8 @@ struct mlx5_ib_flow_db {
* creates the actual hardware QP.
*/
#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
+#define MLX5_IB_QPT_DCI IB_QPT_RESERVED3
+#define MLX5_IB_QPT_DCT IB_QPT_RESERVED4
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
#define MLX5_IB_UMR_OCTOWORD 16
@@ -360,12 +358,18 @@ struct mlx5_bf {
struct mlx5_sq_bfreg *bfreg;
};
+struct mlx5_ib_dct {
+ struct mlx5_core_dct mdct;
+ u32 *in;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
struct mlx5_ib_qp_trans trans_qp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
struct mlx5_ib_rss_qp rss_qp;
+ struct mlx5_ib_dct dct;
};
struct mlx5_buf buf;
@@ -404,6 +408,8 @@ struct mlx5_ib_qp {
u32 rate_limit;
u32 underlay_qpn;
bool tunnel_offload_en;
+ /* storage for qp sub type when core qp type is IB_QPT_DRIVER */
+ enum ib_qp_type qp_sub_type;
};
struct mlx5_ib_cq_buf {
@@ -636,10 +642,21 @@ struct mlx5_ib_counters {
u32 num_q_counters;
u32 num_cong_counters;
u16 set_id;
+ bool set_id_valid;
+};
+
+struct mlx5_ib_multiport_info;
+
+struct mlx5_ib_multiport {
+ struct mlx5_ib_multiport_info *mpi;
+ /* To be held when accessing the multiport info */
+ spinlock_t mpi_lock;
};
struct mlx5_ib_port {
struct mlx5_ib_counters cnts;
+ struct mlx5_ib_multiport mp;
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
};
struct mlx5_roce {
@@ -651,12 +668,15 @@ struct mlx5_roce {
struct notifier_block nb;
atomic_t next_port;
enum ib_port_state last_port_state;
+ struct mlx5_ib_dev *dev;
+ u8 native_port_num;
};
struct mlx5_ib_dbg_param {
int offset;
struct mlx5_ib_dev *dev;
struct dentry *dentry;
+ u8 port_num;
};
enum mlx5_ib_dbg_cc_types {
@@ -709,10 +729,50 @@ struct mlx5_ib_delay_drop {
struct mlx5_ib_dbg_delay_drop *dbg;
};
+enum mlx5_ib_stages {
+ MLX5_IB_STAGE_INIT,
+ MLX5_IB_STAGE_CAPS,
+ MLX5_IB_STAGE_ROCE,
+ MLX5_IB_STAGE_DEVICE_RESOURCES,
+ MLX5_IB_STAGE_ODP,
+ MLX5_IB_STAGE_COUNTERS,
+ MLX5_IB_STAGE_CONG_DEBUGFS,
+ MLX5_IB_STAGE_UAR,
+ MLX5_IB_STAGE_BFREG,
+ MLX5_IB_STAGE_IB_REG,
+ MLX5_IB_STAGE_UMR_RESOURCES,
+ MLX5_IB_STAGE_DELAY_DROP,
+ MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_MAX,
+};
+
+struct mlx5_ib_stage {
+ int (*init)(struct mlx5_ib_dev *dev);
+ void (*cleanup)(struct mlx5_ib_dev *dev);
+};
+
+#define STAGE_CREATE(_stage, _init, _cleanup) \
+ .stage[_stage] = {.init = _init, .cleanup = _cleanup}
+
+struct mlx5_ib_profile {
+ struct mlx5_ib_stage stage[MLX5_IB_STAGE_MAX];
+};
+
+struct mlx5_ib_multiport_info {
+ struct list_head list;
+ struct mlx5_ib_dev *ibdev;
+ struct mlx5_core_dev *mdev;
+ struct completion unref_comp;
+ u64 sys_image_guid;
+ u32 mdev_refcnt;
+ bool is_master;
+ bool unaffiliate;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
- struct mlx5_roce roce;
+ struct mlx5_roce roce[MLX5_MAX_PORTS];
int num_ports;
/* serialize update of capability mask
*/
@@ -746,12 +806,14 @@ struct mlx5_ib_dev {
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
- struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ const struct mlx5_ib_profile *profile;
/* protect the user_td */
struct mutex lb_mutex;
u32 user_td;
u8 umr_fence;
+ struct list_head ib_dev_list;
+ u64 sys_image_guid;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -956,13 +1018,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
struct mlx5_pagefault *pfault);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
@@ -977,7 +1040,6 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
-static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
@@ -1001,8 +1063,8 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type);
-void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev);
-int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev);
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
@@ -1021,6 +1083,15 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
+void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi,
+ int bfregn);
+struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi);
+struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
+ u8 ib_port_num,
+ u8 *native_port_num);
+void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
+ u8 port_num);
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -1052,8 +1123,8 @@ static inline u32 check_cq_create_flags(u32 flags)
* It returns non-zero value for unsupported CQ
* create flags, otherwise it returns zero.
*/
- return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
- IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+ return (flags & ~(IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
}
static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
@@ -1113,10 +1184,10 @@ static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_suppor
MLX5_UARS_IN_PAGE : 1;
}
-static inline int get_num_uars(struct mlx5_ib_dev *dev,
- struct mlx5_bfreg_info *bfregi)
+static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
- return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_sys_pages;
+ return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
}
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d109fe8290a7..556e015678de 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1206,6 +1206,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
bool use_umr = true;
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EINVAL);
+
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e2197bdda89c..f1a87a690a4c 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1207,10 +1207,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret;
- ret = init_srcu_struct(&dev->mr_srcu);
- if (ret)
- return ret;
-
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
if (ret) {
@@ -1222,11 +1218,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
return 0;
}
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
-{
- cleanup_srcu_struct(&dev->mr_srcu);
-}
-
int mlx5_ib_odp_init(void)
{
mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index cffe5966aef9..39d24bf694a8 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -493,7 +493,7 @@ enum {
static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
{
- return get_num_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR;
+ return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR;
}
static int num_med_bfreg(struct mlx5_ib_dev *dev,
@@ -581,7 +581,7 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev,
return bfregn;
}
-static void free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
+void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
{
mutex_lock(&bfregi->lock);
bfregi->count[bfregn]--;
@@ -613,6 +613,7 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1;
+ case MLX5_IB_QPT_DCI: return MLX5_QP_ST_DCI;
case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
case IB_QPT_RAW_PACKET:
case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
@@ -627,7 +628,8 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
- struct mlx5_bfreg_info *bfregi, int bfregn)
+ struct mlx5_bfreg_info *bfregi, int bfregn,
+ bool dyn_bfreg)
{
int bfregs_per_sys_page;
int index_of_sys_page;
@@ -637,8 +639,16 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
MLX5_NON_FP_BFREGS_PER_UAR;
index_of_sys_page = bfregn / bfregs_per_sys_page;
- offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
+ if (dyn_bfreg) {
+ index_of_sys_page += bfregi->num_static_sys_pages;
+ if (bfregn > bfregi->num_dyn_bfregs ||
+ bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
+ mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
+ return -EINVAL;
+ }
+ }
+ offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
return bfregi->sys_pages[index_of_sys_page] + offset;
}
@@ -764,7 +774,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
int page_shift = 0;
- int uar_index;
+ int uar_index = 0;
int npages;
u32 offset = 0;
int bfregn;
@@ -780,12 +790,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
context = to_mucontext(pd->uobject->context);
- /*
- * TBD: should come from the verbs when we have the API
- */
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
+ uar_index = bfregn_to_uar_index(dev, &context->bfregi,
+ ucmd.bfreg_index, true);
+ if (uar_index < 0)
+ return uar_index;
+
+ bfregn = MLX5_IB_INVALID_BFREG;
+ } else if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) {
+ /*
+ * TBD: should come from the verbs when we have the API
+ */
/* In CROSS_CHANNEL CQ and QP must use the same UAR */
bfregn = MLX5_CROSS_CHANNEL_BFREG;
+ }
else {
bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH);
if (bfregn < 0) {
@@ -804,8 +822,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
}
- uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn);
mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
+ if (bfregn != MLX5_IB_INVALID_BFREG)
+ uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn,
+ false);
qp->rq.offset = 0;
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -845,7 +865,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, page_offset, offset);
MLX5_SET(qpc, qpc, uar_page, uar_index);
- resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
+ if (bfregn != MLX5_IB_INVALID_BFREG)
+ resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
+ else
+ resp->bfreg_index = MLX5_IB_INVALID_BFREG;
qp->bfregn = bfregn;
err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
@@ -874,7 +897,8 @@ err_umem:
ib_umem_release(ubuffer->umem);
err_bfreg:
- free_bfreg(dev, &context->bfregi, bfregn);
+ if (bfregn != MLX5_IB_INVALID_BFREG)
+ mlx5_ib_free_bfreg(dev, &context->bfregi, bfregn);
return err;
}
@@ -887,7 +911,13 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_ib_db_unmap_user(context, &qp->db);
if (base->ubuffer.umem)
ib_umem_release(base->ubuffer.umem);
- free_bfreg(dev, &context->bfregi, qp->bfregn);
+
+ /*
+ * Free only the BFREGs which are handled by the kernel.
+ * BFREGs of UARs allocated dynamically are handled by user.
+ */
+ if (qp->bfregn != MLX5_IB_INVALID_BFREG)
+ mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
@@ -1015,6 +1045,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
{
if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
+ (attr->qp_type == MLX5_IB_QPT_DCI) ||
(attr->qp_type == IB_QPT_XRC_INI))
return MLX5_SRQ_RQ;
else if (!qp->has_rq)
@@ -2086,20 +2117,108 @@ static const char *ib_qp_type_str(enum ib_qp_type type)
return "IB_QPT_RAW_PACKET";
case MLX5_IB_QPT_REG_UMR:
return "MLX5_IB_QPT_REG_UMR";
+ case IB_QPT_DRIVER:
+ return "IB_QPT_DRIVER";
case IB_QPT_MAX:
default:
return "Invalid QP type";
}
}
+static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct mlx5_ib_create_qp *ucmd)
+{
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_qp *qp;
+ int err = 0;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ void *dctc;
+
+ if (!attr->srq || !attr->recv_cq)
+ return ERR_PTR(-EINVAL);
+
+ dev = to_mdev(pd->device);
+
+ err = get_qp_user_index(to_mucontext(pd->uobject->context),
+ ucmd, sizeof(*ucmd), &uidx);
+ if (err)
+ return ERR_PTR(err);
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
+ if (!qp->dct.in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
+ qp->qp_sub_type = MLX5_IB_QPT_DCT;
+ MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
+ MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
+ MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
+ MLX5_SET(dctc, dctc, user_index, uidx);
+
+ qp->state = IB_QPS_RESET;
+
+ return &qp->ibqp;
+err_free:
+ kfree(qp);
+ return ERR_PTR(err);
+}
+
+static int set_mlx_qp_type(struct mlx5_ib_dev *dev,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx5_ib_create_qp *ucmd,
+ struct ib_udata *udata)
+{
+ enum { MLX_QP_FLAGS = MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI };
+ int err;
+
+ if (!udata)
+ return -EINVAL;
+
+ if (udata->inlen < sizeof(*ucmd)) {
+ mlx5_ib_dbg(dev, "create_qp user command is smaller than expected\n");
+ return -EINVAL;
+ }
+ err = ib_copy_from_udata(ucmd, udata, sizeof(*ucmd));
+ if (err)
+ return err;
+
+ if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCI) {
+ init_attr->qp_type = MLX5_IB_QPT_DCI;
+ } else {
+ if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCT) {
+ init_attr->qp_type = MLX5_IB_QPT_DCT;
+ } else {
+ mlx5_ib_dbg(dev, "Invalid QP flags\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!MLX5_CAP_GEN(dev->mdev, dct)) {
+ mlx5_ib_dbg(dev, "DC transport is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
+ struct ib_qp_init_attr *verbs_init_attr,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev;
struct mlx5_ib_qp *qp;
u16 xrcdn = 0;
int err;
+ struct ib_qp_init_attr mlx_init_attr;
+ struct ib_qp_init_attr *init_attr = verbs_init_attr;
if (pd) {
dev = to_mdev(pd->device);
@@ -2124,6 +2243,26 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
}
+ if (init_attr->qp_type == IB_QPT_DRIVER) {
+ struct mlx5_ib_create_qp ucmd;
+
+ init_attr = &mlx_init_attr;
+ memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr));
+ err = set_mlx_qp_type(dev, init_attr, &ucmd, udata);
+ if (err)
+ return ERR_PTR(err);
+
+ if (init_attr->qp_type == MLX5_IB_QPT_DCI) {
+ if (init_attr->cap.max_recv_wr ||
+ init_attr->cap.max_recv_sge) {
+ mlx5_ib_dbg(dev, "DCI QP requires zero size receive queue\n");
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ return mlx5_ib_create_dct(pd, init_attr, &ucmd);
+ }
+ }
+
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
case IB_QPT_XRC_INI:
@@ -2145,6 +2284,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case MLX5_IB_QPT_HW_GSI:
case MLX5_IB_QPT_REG_UMR:
+ case MLX5_IB_QPT_DCI:
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
@@ -2185,9 +2325,31 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
+ if (verbs_init_attr->qp_type == IB_QPT_DRIVER)
+ qp->qp_sub_type = init_attr->qp_type;
+
return &qp->ibqp;
}
+static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
+
+ if (mqp->state == IB_QPS_RTR) {
+ int err;
+
+ err = mlx5_core_destroy_dct(dev->mdev, &mqp->dct.mdct);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err);
+ return err;
+ }
+ }
+
+ kfree(mqp->dct.in);
+ kfree(mqp);
+ return 0;
+}
+
int mlx5_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
@@ -2196,6 +2358,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
if (unlikely(qp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_destroy_qp(qp);
+ if (mqp->qp_sub_type == MLX5_IB_QPT_DCT)
+ return mlx5_ib_destroy_dct(mqp);
+
destroy_qp_common(dev, mqp);
kfree(mqp);
@@ -2763,7 +2928,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (!context)
return -ENOMEM;
- err = to_mlx5_st(ibqp->qp_type);
+ err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
+ qp->qp_sub_type : ibqp->qp_type);
if (err < 0) {
mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
goto out;
@@ -2796,8 +2962,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
(ibqp->qp_type == IB_QPT_XRC_INI) ||
(ibqp->qp_type == IB_QPT_XRC_TGT)) {
if (mlx5_lag_is_active(dev->mdev)) {
+ u8 p = mlx5_core_native_port_num(dev->mdev);
tx_affinity = (unsigned int)atomic_add_return(1,
- &dev->roce.next_port) %
+ &dev->roce[p].next_port) %
MLX5_MAX_PORTS + 1;
context->flags |= cpu_to_be32(tx_affinity << 24);
}
@@ -2922,7 +3089,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
- mlx5_st = to_mlx5_st(ibqp->qp_type);
+ mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
+ qp->qp_sub_type : ibqp->qp_type);
if (mlx5_st < 0)
goto out;
@@ -2994,6 +3162,139 @@ out:
return err;
}
+static inline bool is_valid_mask(int mask, int req, int opt)
+{
+ if ((mask & req) != req)
+ return false;
+
+ if (mask & ~(req | opt))
+ return false;
+
+ return true;
+}
+
+/* check valid transition for driver QP types
+ * for now the only QP type that this function supports is DCI
+ */
+static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new_state,
+ enum ib_qp_attr_mask attr_mask)
+{
+ int req = IB_QP_STATE;
+ int opt = 0;
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
+ opt = IB_QP_PKEY_INDEX | IB_QP_PORT;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ req |= IB_QP_PATH_MTU;
+ opt = IB_QP_PKEY_INDEX;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
+ req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC | IB_QP_SQ_PSN;
+ opt = IB_QP_MIN_RNR_TIMER;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) {
+ opt = IB_QP_MIN_RNR_TIMER;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state != IB_QPS_RESET && new_state == IB_QPS_ERR) {
+ return is_valid_mask(attr_mask, req, opt);
+ }
+ return false;
+}
+
+/* mlx5_ib_modify_dct: modify a DCT QP
+ * valid transitions are:
+ * RESET to INIT: must set access_flags, pkey_index and port
+ * INIT to RTR : must set min_rnr_timer, tclass, flow_label,
+ * mtu, gid_index and hop_limit
+ * Other transitions and attributes are illegal
+ */
+static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ enum ib_qp_state cur_state, new_state;
+ int err = 0;
+ int required = IB_QP_STATE;
+ void *dctc;
+
+ if (!(attr_mask & IB_QP_STATE))
+ return -EINVAL;
+
+ cur_state = qp->state;
+ new_state = attr->qp_state;
+
+ dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
+ if (!is_valid_mask(attr_mask, required, 0))
+ return -EINVAL;
+
+ if (attr->port_num == 0 ||
+ attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)) {
+ mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
+ attr->port_num, dev->num_ports);
+ return -EINVAL;
+ }
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ MLX5_SET(dctc, dctc, rre, 1);
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ MLX5_SET(dctc, dctc, rwe, 1);
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
+ if (!mlx5_ib_dc_atomic_is_supported(dev))
+ return -EOPNOTSUPP;
+ MLX5_SET(dctc, dctc, rae, 1);
+ MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX);
+ }
+ MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
+ MLX5_SET(dctc, dctc, port, attr->port_num);
+ MLX5_SET(dctc, dctc, counter_set_id, dev->port[attr->port_num - 1].cnts.set_id);
+
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ struct mlx5_ib_modify_qp_resp resp = {};
+ u32 min_resp_len = offsetof(typeof(resp), dctn) +
+ sizeof(resp.dctn);
+
+ if (udata->outlen < min_resp_len)
+ return -EINVAL;
+ resp.response_length = min_resp_len;
+
+ required |= IB_QP_MIN_RNR_TIMER | IB_QP_AV | IB_QP_PATH_MTU;
+ if (!is_valid_mask(attr_mask, required, 0))
+ return -EINVAL;
+ MLX5_SET(dctc, dctc, min_rnr_nak, attr->min_rnr_timer);
+ MLX5_SET(dctc, dctc, tclass, attr->ah_attr.grh.traffic_class);
+ MLX5_SET(dctc, dctc, flow_label, attr->ah_attr.grh.flow_label);
+ MLX5_SET(dctc, dctc, mtu, attr->path_mtu);
+ MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
+ MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
+
+ err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in,
+ MLX5_ST_SZ_BYTES(create_dct_in));
+ if (err)
+ return err;
+ resp.dctn = qp->dct.mdct.mqp.qpn;
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err) {
+ mlx5_core_destroy_dct(dev->mdev, &qp->dct.mdct);
+ return err;
+ }
+ } else {
+ mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
+ return -EINVAL;
+ }
+ if (err)
+ qp->state = IB_QPS_ERR;
+ else
+ qp->state = new_state;
+ return err;
+}
+
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -3011,8 +3312,14 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
- qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
- IB_QPT_GSI : ibqp->qp_type;
+ if (ibqp->qp_type == IB_QPT_DRIVER)
+ qp_type = qp->qp_sub_type;
+ else
+ qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
+ IB_QPT_GSI : ibqp->qp_type;
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata);
mutex_lock(&qp->mutex);
@@ -3031,15 +3338,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
} else if (qp_type != MLX5_IB_QPT_REG_UMR &&
- !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
+ qp_type != MLX5_IB_QPT_DCI &&
+ !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
cur_state, new_state, ibqp->qp_type, attr_mask);
goto out;
+ } else if (qp_type == MLX5_IB_QPT_DCI &&
+ !modify_dci_qp_is_ok(cur_state, new_state, attr_mask)) {
+ mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
+ cur_state, new_state, qp_type, attr_mask);
+ goto out;
}
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 ||
- attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) {
+ attr->port_num > dev->num_ports)) {
mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
attr->port_num, dev->num_ports);
goto out;
@@ -4358,11 +4671,10 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
struct rdma_ah_attr *ah_attr,
struct mlx5_qp_path *path)
{
- struct mlx5_core_dev *dev = ibdev->mdev;
memset(ah_attr, 0, sizeof(*ah_attr));
- if (!path->port || path->port > MLX5_CAP_GEN(dev, num_ports))
+ if (!path->port || path->port > ibdev->num_ports)
return;
ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
@@ -4577,6 +4889,71 @@ out:
return err;
}
+static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_core_dct *dct = &mqp->dct.mdct;
+ u32 *out;
+ u32 access_flags = 0;
+ int outlen = MLX5_ST_SZ_BYTES(query_dct_out);
+ void *dctc;
+ int err;
+ int supported_mask = IB_QP_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PORT |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_PKEY_INDEX;
+
+ if (qp_attr_mask & ~supported_mask)
+ return -EINVAL;
+ if (mqp->state != IB_QPS_RTR)
+ return -EINVAL;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_dct_query(dev->mdev, dct, out, outlen);
+ if (err)
+ goto out;
+
+ dctc = MLX5_ADDR_OF(query_dct_out, out, dct_context_entry);
+
+ if (qp_attr_mask & IB_QP_STATE)
+ qp_attr->qp_state = IB_QPS_RTR;
+
+ if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
+ if (MLX5_GET(dctc, dctc, rre))
+ access_flags |= IB_ACCESS_REMOTE_READ;
+ if (MLX5_GET(dctc, dctc, rwe))
+ access_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (MLX5_GET(dctc, dctc, rae))
+ access_flags |= IB_ACCESS_REMOTE_ATOMIC;
+ qp_attr->qp_access_flags = access_flags;
+ }
+
+ if (qp_attr_mask & IB_QP_PORT)
+ qp_attr->port_num = MLX5_GET(dctc, dctc, port);
+ if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
+ qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
+ if (qp_attr_mask & IB_QP_AV) {
+ qp_attr->ah_attr.grh.traffic_class = MLX5_GET(dctc, dctc, tclass);
+ qp_attr->ah_attr.grh.flow_label = MLX5_GET(dctc, dctc, flow_label);
+ qp_attr->ah_attr.grh.sgid_index = MLX5_GET(dctc, dctc, my_addr_index);
+ qp_attr->ah_attr.grh.hop_limit = MLX5_GET(dctc, dctc, hop_limit);
+ }
+ if (qp_attr_mask & IB_QP_PATH_MTU)
+ qp_attr->path_mtu = MLX5_GET(dctc, dctc, mtu);
+ if (qp_attr_mask & IB_QP_PKEY_INDEX)
+ qp_attr->pkey_index = MLX5_GET(dctc, dctc, pkey_index);
+out:
+ kfree(out);
+ return err;
+}
+
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
@@ -4596,6 +4973,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
memset(qp_attr, 0, sizeof(*qp_attr));
+ if (unlikely(qp->qp_sub_type == MLX5_IB_QPT_DCT))
+ return mlx5_ib_dct_query_qp(dev, qp, qp_attr,
+ qp_attr_mask, qp_init_attr);
+
mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
@@ -4685,13 +5066,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
int err;
err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
- if (err) {
+ if (err)
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
- return err;
- }
kfree(xrcd);
-
return 0;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index c6fe89d79248..2fe503e86c1d 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -472,7 +472,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
goto out;
}
- ret = get_user_pages(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages, NULL);
+ ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
if (ret < 0)
goto out;
@@ -623,13 +623,12 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
page = dev->db_tab->page + end;
alloc:
- page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
- &page->mapping, GFP_KERNEL);
+ page->db_rec = dma_zalloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
+ &page->mapping, GFP_KERNEL);
if (!page->db_rec) {
ret = -ENOMEM;
goto out;
}
- memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
ret = mthca_MAP_ICM_page(dev, page->mapping,
mthca_uarc_virt(dev, &dev->driver_uar, i));
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
deleted file mode 100644
index 5fe56e810739..000000000000
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MTHCA_USER_H
-#define MTHCA_USER_H
-
-#include <linux/types.h>
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define MTHCA_UVERBS_ABI_VERSION 1
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct mthca_alloc_ucontext_resp {
- __u32 qp_tab_size;
- __u32 uarc_size;
-};
-
-struct mthca_alloc_pd_resp {
- __u32 pdn;
- __u32 reserved;
-};
-
-struct mthca_reg_mr {
-/*
- * Mark the memory region with a DMA attribute that causes
- * in-flight DMA to be flushed when the region is written to:
- */
-#define MTHCA_MR_DMASYNC 0x1
- __u32 mr_attrs;
- __u32 reserved;
-};
-
-struct mthca_create_cq {
- __u32 lkey;
- __u32 pdn;
- __u64 arm_db_page;
- __u64 set_db_page;
- __u32 arm_db_index;
- __u32 set_db_index;
-};
-
-struct mthca_create_cq_resp {
- __u32 cqn;
- __u32 reserved;
-};
-
-struct mthca_resize_cq {
- __u32 lkey;
- __u32 reserved;
-};
-
-struct mthca_create_srq {
- __u32 lkey;
- __u32 db_index;
- __u64 db_page;
-};
-
-struct mthca_create_srq_resp {
- __u32 srqn;
- __u32 reserved;
-};
-
-struct mthca_create_qp {
- __u32 lkey;
- __u32 reserved;
- __u64 sq_db_page;
- __u64 rq_db_page;
- __u32 sq_db_index;
- __u32 rq_db_index;
-};
-
-#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index c56ca2a74df5..6cdfbf8c5674 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1365,7 +1365,7 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
struct nes_cm_node *cm_node)
{
- cm_node->accelerated = 1;
+ cm_node->accelerated = true;
if (cm_node->accept_pend) {
BUG_ON(!cm_node->listener);
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index d827d03e3941..b9cc02b4e8d5 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -279,7 +279,6 @@ struct nes_cm_tcp_context {
u8 rcv_wscale;
struct nes_cm_tsa_context tsa_cntxt;
- struct timeval sent_ts;
};
@@ -341,7 +340,7 @@ struct nes_cm_node {
u16 mpa_frame_size;
struct iw_cm_id *cm_id;
struct list_head list;
- int accelerated;
+ bool accelerated;
struct nes_cm_listener *listener;
enum nes_cm_conn_type conn_type;
struct nes_vnic *nesvnic;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 0ba695a88b62..9904918589a4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -380,11 +380,10 @@ static int ocrdma_alloc_q(struct ocrdma_dev *dev,
q->len = len;
q->entry_size = entry_size;
q->size = len * entry_size;
- q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size,
- &q->dma, GFP_KERNEL);
+ q->va = dma_zalloc_coherent(&dev->nic_info.pdev->dev, q->size,
+ &q->dma, GFP_KERNEL);
if (!q->va)
return -ENOMEM;
- memset(q->va, 0, q->size);
return 0;
}
@@ -1819,12 +1818,11 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
return -ENOMEM;
ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_CREATE_CQ,
OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
- cq->va = dma_alloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL);
+ cq->va = dma_zalloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL);
if (!cq->va) {
status = -ENOMEM;
goto mem_err;
}
- memset(cq->va, 0, cq->len);
page_size = cq->len / hw_pages;
cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
@@ -2212,10 +2210,9 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
qp->sq.max_cnt = max_wqe_allocated;
len = (hw_pages * hw_page_size);
- qp->sq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+ qp->sq.va = dma_zalloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
if (!qp->sq.va)
return -EINVAL;
- memset(qp->sq.va, 0, len);
qp->sq.len = len;
qp->sq.pa = pa;
qp->sq.entry_size = dev->attr.wqe_size;
@@ -2263,10 +2260,9 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
qp->rq.max_cnt = max_rqe_allocated;
len = (hw_pages * hw_page_size);
- qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+ qp->rq.va = dma_zalloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
if (!qp->rq.va)
return -ENOMEM;
- memset(qp->rq.va, 0, len);
qp->rq.pa = pa;
qp->rq.len = len;
qp->rq.entry_size = dev->attr.rqe_size;
@@ -2320,11 +2316,10 @@ static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
if (dev->attr.ird == 0)
return 0;
- qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len,
- &pa, GFP_KERNEL);
+ qp->ird_q_va = dma_zalloc_coherent(&pdev->dev, ird_q_len, &pa,
+ GFP_KERNEL);
if (!qp->ird_q_va)
return -ENOMEM;
- memset(qp->ird_q_va, 0, ird_q_len);
ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
pa, ird_page_size);
for (; i < ird_q_len / dev->attr.rqe_size; i++) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index e528d7acb7f6..24d20a4aa262 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -73,15 +73,13 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev)
mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
sizeof(struct ocrdma_rdma_stats_resp));
- mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
- &mem->pa, GFP_KERNEL);
+ mem->va = dma_zalloc_coherent(&dev->nic_info.pdev->dev, mem->size,
+ &mem->pa, GFP_KERNEL);
if (!mem->va) {
pr_err("%s: stats mbox allocation failed\n", __func__);
return false;
}
- memset(mem->va, 0, mem->size);
-
/* Alloc debugfs mem */
mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
if (!mem->debugfs_mem)
@@ -834,7 +832,7 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
dev->reset_stats.type = OCRDMA_RESET_STATS;
dev->reset_stats.dev = dev;
- if (!debugfs_create_file("reset_stats", S_IRUSR, dev->dir,
+ if (!debugfs_create_file("reset_stats", 0200, dev->dir,
&dev->reset_stats, &ocrdma_dbg_ops))
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 7866fd8051f6..8009bdad4e5b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -461,7 +461,7 @@ retry:
static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
struct ocrdma_pd *pd)
{
- return (uctx->cntxt_pd == pd ? true : false);
+ return (uctx->cntxt_pd == pd);
}
static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
@@ -550,13 +550,12 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&ctx->mm_head);
mutex_init(&ctx->mm_list_lock);
- ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
- &ctx->ah_tbl.pa, GFP_KERNEL);
+ ctx->ah_tbl.va = dma_zalloc_coherent(&pdev->dev, map_len,
+ &ctx->ah_tbl.pa, GFP_KERNEL);
if (!ctx->ah_tbl.va) {
kfree(ctx);
return ERR_PTR(-ENOMEM);
}
- memset(ctx->ah_tbl.va, 0, map_len);
ctx->ah_tbl.len = map_len;
memset(&resp, 0, sizeof(resp));
@@ -885,13 +884,12 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
return -ENOMEM;
for (i = 0; i < mr->num_pbls; i++) {
- va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
+ va = dma_zalloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
if (!va) {
ocrdma_free_mr_pbl_tbl(dev, mr);
status = -ENOMEM;
break;
}
- memset(va, 0, dma_len);
mr->pbl_table[i].va = va;
mr->pbl_table[i].pa = pa;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index b26aa88dab48..53f00dbf313f 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -604,12 +604,11 @@ static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
return ERR_PTR(-ENOMEM);
for (i = 0; i < pbl_info->num_pbls; i++) {
- va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
- &pa, flags);
+ va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
+ &pa, flags);
if (!va)
goto err;
- memset(va, 0, pbl_info->pbl_size);
pbl_table[i].va = va;
pbl_table[i].pa = pa;
}
@@ -3040,7 +3039,7 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
swqe->wqe_size = 2;
swqe2 = qed_chain_produce(&qp->sq.pbl);
- swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
+ swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
wr, bad_wr);
swqe->length = cpu_to_le32(length);
@@ -3471,9 +3470,9 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
break;
case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
if (qp->state != QED_ROCE_QP_STATE_ERR)
- DP_ERR(dev,
- "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
- cq->icid, qp->icid);
+ DP_DEBUG(dev, QEDR_MSG_CQ,
+ "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+ cq->icid, qp->icid);
cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
IB_WC_WR_FLUSH_ERR, 1);
break;
@@ -3591,7 +3590,7 @@ static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
wc->byte_len = le32_to_cpu(resp->length);
if (resp->flags & QEDR_RESP_IMM) {
- wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
wc->wc_flags |= IB_WC_WITH_IMM;
if (resp->flags & QEDR_RESP_RDMA)
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 092ed8103842..0235f76bbc72 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1428,8 +1428,6 @@ u64 qib_sps_ints(void);
*/
dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
size_t, int);
-const char *qib_get_unit_name(int unit);
-const char *qib_get_card_name(struct rvt_dev_info *rdi);
struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
/*
@@ -1488,15 +1486,15 @@ extern struct mutex qib_mutex;
#define qib_dev_err(dd, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
- qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define qib_dev_warn(dd, fmt, ...) \
dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
- qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define qib_dev_porterr(dd, port, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
- qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (dd)->unit, (port), \
##__VA_ARGS__)
#define qib_devinfo(pcidev, fmt, ...) \
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 33d3335385e8..3117cc5f2a9a 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -81,22 +81,6 @@ MODULE_DESCRIPTION("Intel IB driver");
struct qlogic_ib_stats qib_stats;
-const char *qib_get_unit_name(int unit)
-{
- static char iname[16];
-
- snprintf(iname, sizeof(iname), "infinipath%u", unit);
- return iname;
-}
-
-const char *qib_get_card_name(struct rvt_dev_info *rdi)
-{
- struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
- struct qib_devdata *dd = container_of(ibdev,
- struct qib_devdata, verbs_dev);
- return qib_get_unit_name(dd->unit);
-}
-
struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
{
struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 33a2e74c8495..5838b3bf34b9 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -163,8 +163,7 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
if (bguid[6] == 0xff) {
if (bguid[5] == 0xff) {
qib_dev_err(dd,
- "Can't set %s GUID from base, wraps to OUI!\n",
- qib_get_unit_name(t));
+ "Can't set GUID from base, wraps to OUI!\n");
dd->base_guid = 0;
goto bail;
}
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index b67df63bd64b..f7593b5e2b76 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -568,20 +568,16 @@ done:
static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
{
struct qib_pportdata *ppd = rcd->ppd;
- int i, any = 0, pidx = -1;
+ int i, pidx = -1;
+ bool any = false;
u16 lkey = key & 0x7FFF;
- int ret;
- if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF)) {
+ if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF))
/* nothing to do; this key always valid */
- ret = 0;
- goto bail;
- }
+ return 0;
- if (!lkey) {
- ret = -EINVAL;
- goto bail;
- }
+ if (!lkey)
+ return -EINVAL;
/*
* Set the full membership bit, because it has to be
@@ -594,18 +590,14 @@ static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) {
if (!rcd->pkeys[i] && pidx == -1)
pidx = i;
- if (rcd->pkeys[i] == key) {
- ret = -EEXIST;
- goto bail;
- }
+ if (rcd->pkeys[i] == key)
+ return -EEXIST;
}
- if (pidx == -1) {
- ret = -EBUSY;
- goto bail;
- }
- for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
+ if (pidx == -1)
+ return -EBUSY;
+ for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
if (!ppd->pkeys[i]) {
- any++;
+ any = true;
continue;
}
if (ppd->pkeys[i] == key) {
@@ -613,44 +605,34 @@ static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
if (atomic_inc_return(pkrefs) > 1) {
rcd->pkeys[pidx] = key;
- ret = 0;
- goto bail;
- } else {
- /*
- * lost race, decrement count, catch below
- */
- atomic_dec(pkrefs);
- any++;
+ return 0;
}
+ /*
+ * lost race, decrement count, catch below
+ */
+ atomic_dec(pkrefs);
+ any = true;
}
- if ((ppd->pkeys[i] & 0x7FFF) == lkey) {
+ if ((ppd->pkeys[i] & 0x7FFF) == lkey)
/*
* It makes no sense to have both the limited and
* full membership PKEY set at the same time since
* the unlimited one will disable the limited one.
*/
- ret = -EEXIST;
- goto bail;
- }
- }
- if (!any) {
- ret = -EBUSY;
- goto bail;
+ return -EEXIST;
}
- for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
+ if (!any)
+ return -EBUSY;
+ for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
if (!ppd->pkeys[i] &&
atomic_inc_return(&ppd->pkeyrefs[i]) == 1) {
rcd->pkeys[pidx] = key;
ppd->pkeys[i] = key;
(void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
- ret = 0;
- goto bail;
+ return 0;
}
}
- ret = -EBUSY;
-
-bail:
- return ret;
+ return -EBUSY;
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 85dfbba427f6..3990f386aa32 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1119,6 +1119,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
"Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
+ rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s%d", "qib", dd->unit);
+
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
deleted file mode 100644
index 8fdf79f8d4e4..000000000000
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "qib.h"
-
-/**
- * qib_alloc_lkey - allocate an lkey
- * @mr: memory region that this lkey protects
- * @dma_region: 0->normal key, 1->restricted DMA key
- *
- * Returns 0 if successful, otherwise returns -errno.
- *
- * Increments mr reference count as required.
- *
- * Sets the lkey field mr for non-dma regions.
- *
- */
-
-int qib_alloc_lkey(struct rvt_mregion *mr, int dma_region)
-{
- unsigned long flags;
- u32 r;
- u32 n;
- int ret = 0;
- struct qib_ibdev *dev = to_idev(mr->pd->device);
- struct rvt_lkey_table *rkt = &dev->lk_table;
-
- spin_lock_irqsave(&rkt->lock, flags);
-
- /* special case for dma_mr lkey == 0 */
- if (dma_region) {
- struct rvt_mregion *tmr;
-
- tmr = rcu_access_pointer(dev->dma_mr);
- if (!tmr) {
- qib_get_mr(mr);
- rcu_assign_pointer(dev->dma_mr, mr);
- mr->lkey_published = 1;
- }
- goto success;
- }
-
- /* Find the next available LKEY */
- r = rkt->next;
- n = r;
- for (;;) {
- if (rkt->table[r] == NULL)
- break;
- r = (r + 1) & (rkt->max - 1);
- if (r == n)
- goto bail;
- }
- rkt->next = (r + 1) & (rkt->max - 1);
- /*
- * Make sure lkey is never zero which is reserved to indicate an
- * unrestricted LKEY.
- */
- rkt->gen++;
- /*
- * bits are capped in qib_verbs.c to insure enough bits
- * for generation number
- */
- mr->lkey = (r << (32 - ib_rvt_lkey_table_size)) |
- ((((1 << (24 - ib_rvt_lkey_table_size)) - 1) & rkt->gen)
- << 8);
- if (mr->lkey == 0) {
- mr->lkey |= 1 << 8;
- rkt->gen++;
- }
- qib_get_mr(mr);
- rcu_assign_pointer(rkt->table[r], mr);
- mr->lkey_published = 1;
-success:
- spin_unlock_irqrestore(&rkt->lock, flags);
-out:
- return ret;
-bail:
- spin_unlock_irqrestore(&rkt->lock, flags);
- ret = -ENOMEM;
- goto out;
-}
-
-/**
- * qib_free_lkey - free an lkey
- * @mr: mr to free from tables
- */
-void qib_free_lkey(struct rvt_mregion *mr)
-{
- unsigned long flags;
- u32 lkey = mr->lkey;
- u32 r;
- struct qib_ibdev *dev = to_idev(mr->pd->device);
- struct rvt_lkey_table *rkt = &dev->lk_table;
-
- spin_lock_irqsave(&rkt->lock, flags);
- if (!mr->lkey_published)
- goto out;
- if (lkey == 0)
- RCU_INIT_POINTER(dev->dma_mr, NULL);
- else {
- r = lkey >> (32 - ib_rvt_lkey_table_size);
- RCU_INIT_POINTER(rkt->table[r], NULL);
- }
- qib_put_mr(mr);
- mr->lkey_published = 0;
-out:
- spin_unlock_irqrestore(&rkt->lock, flags);
-}
-
-/**
- * qib_rkey_ok - check the IB virtual address, length, and RKEY
- * @qp: qp for validation
- * @sge: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- *
- * increments the reference count upon success
- */
-int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
- u32 len, u64 vaddr, u32 rkey, int acc)
-{
- struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
- struct rvt_mregion *mr;
- unsigned n, m;
- size_t off;
-
- /* We use RKEY == zero for kernel virtual addresses */
- rcu_read_lock();
- if (rkey == 0) {
- struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
- struct qib_ibdev *dev = to_idev(pd->ibpd.device);
-
- if (pd->user)
- goto bail;
- mr = rcu_dereference(dev->dma_mr);
- if (!mr)
- goto bail;
- if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
- goto bail;
- rcu_read_unlock();
-
- sge->mr = mr;
- sge->vaddr = (void *) vaddr;
- sge->length = len;
- sge->sge_length = len;
- sge->m = 0;
- sge->n = 0;
- goto ok;
- }
-
- mr = rcu_dereference(
- rkt->table[(rkey >> (32 - ib_rvt_lkey_table_size))]);
- if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
- goto bail;
-
- off = vaddr - mr->iova;
- if (unlikely(vaddr < mr->iova || off + len > mr->length ||
- (mr->access_flags & acc) == 0))
- goto bail;
- if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
- goto bail;
- rcu_read_unlock();
-
- off += mr->offset;
- if (mr->page_shift) {
- /*
- page sizes are uniform power of 2 so no loop is necessary
- entries_spanned_by_off is the number of times the loop below
- would have executed.
- */
- size_t entries_spanned_by_off;
-
- entries_spanned_by_off = off >> mr->page_shift;
- off -= (entries_spanned_by_off << mr->page_shift);
- m = entries_spanned_by_off / RVT_SEGSZ;
- n = entries_spanned_by_off % RVT_SEGSZ;
- } else {
- m = 0;
- n = 0;
- while (off >= mr->map[m]->segs[n].length) {
- off -= mr->map[m]->segs[n].length;
- n++;
- if (n >= RVT_SEGSZ) {
- m++;
- n = 0;
- }
- }
- }
- sge->mr = mr;
- sge->vaddr = mr->map[m]->segs[n].vaddr + off;
- sge->length = mr->map[m]->segs[n].length - off;
- sge->sge_length = len;
- sge->m = m;
- sge->n = n;
-ok:
- return 1;
-bail:
- rcu_read_unlock();
- return 0;
-}
-
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 1a785c37ad0a..cfddff45413f 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -432,13 +432,13 @@ no_flow_control:
qp->s_state = OP(COMPARE_SWAP);
put_ib_ateth_swap(wqe->atomic_wr.swap,
&ohdr->u.atomic_eth);
- put_ib_ateth_swap(wqe->atomic_wr.compare_add,
- &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
put_ib_ateth_swap(wqe->atomic_wr.compare_add,
&ohdr->u.atomic_eth);
- put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
}
put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
&ohdr->u.atomic_eth);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index c55000501582..fabee760407e 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1571,7 +1571,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
if (!ib_qib_sys_image_guid)
ib_qib_sys_image_guid = ppd->guid;
- strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
ibdev->owner = THIS_MODULE;
ibdev->node_guid = ppd->guid;
ibdev->phys_port_cnt = dd->num_pports;
@@ -1586,7 +1585,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
* Fill in rvt info object.
*/
dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
- dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 685ef2293cb8..4210ca14014d 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -45,7 +45,6 @@
#include "usnic_ib_verbs.h"
#include "usnic_ib_sysfs.h"
#include "usnic_log.h"
-#include "usnic_ib_sysfs.h"
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index aa2456a4f9bd..a688a5669168 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -47,7 +47,6 @@
#include "usnic_log.h"
#include "usnic_uiom.h"
#include "usnic_transport.h"
-#include "usnic_ib_verbs.h"
#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 4f7bd3b6a315..44cb1cfba417 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -93,7 +93,7 @@ struct pvrdma_cq {
struct pvrdma_page_dir pdir;
u32 cq_handle;
bool is_kernel;
- atomic_t refcnt;
+ refcount_t refcnt;
struct completion free;
};
@@ -196,7 +196,7 @@ struct pvrdma_qp {
u8 state;
bool is_kernel;
struct mutex mutex; /* QP state mutex. */
- atomic_t refcnt;
+ refcount_t refcnt;
struct completion free;
};
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index e529622cefad..faa9478c14a6 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -132,8 +132,9 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
}
cq->ibcq.cqe = entries;
+ cq->is_kernel = !context;
- if (context) {
+ if (!cq->is_kernel) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
ret = -EFAULT;
goto err_cq;
@@ -148,8 +149,6 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
npages = ib_umem_page_count(cq->umem);
} else {
- cq->is_kernel = true;
-
/* One extra page for shared ring state */
npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
PAGE_SIZE - 1) / PAGE_SIZE;
@@ -178,7 +177,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
else
pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
- atomic_set(&cq->refcnt, 1);
+ refcount_set(&cq->refcnt, 1);
init_completion(&cq->free);
spin_lock_init(&cq->cq_lock);
@@ -202,7 +201,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
- if (context) {
+ if (!cq->is_kernel) {
cq->uar = &(to_vucontext(context)->uar);
/* Copy udata back. */
@@ -219,7 +218,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
err_page_dir:
pvrdma_page_dir_cleanup(dev, &cq->pdir);
err_umem:
- if (context)
+ if (!cq->is_kernel)
ib_umem_release(cq->umem);
err_cq:
atomic_dec(&dev->num_cqs);
@@ -230,7 +229,7 @@ err_cq:
static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
{
- if (atomic_dec_and_test(&cq->refcnt))
+ if (refcount_dec_and_test(&cq->refcnt))
complete(&cq->free);
wait_for_completion(&cq->free);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index e92681878c93..d650a9fcde24 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -243,13 +243,13 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
mutex_init(&dev->port_mutex);
spin_lock_init(&dev->desc_lock);
- dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *),
+ dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *),
GFP_KERNEL);
if (!dev->cq_tbl)
return ret;
spin_lock_init(&dev->cq_tbl_lock);
- dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *),
+ dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *),
GFP_KERNEL);
if (!dev->qp_tbl)
goto err_cq_free;
@@ -333,7 +333,7 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
spin_lock_irqsave(&dev->qp_tbl_lock, flags);
qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
if (qp)
- atomic_inc(&qp->refcnt);
+ refcount_inc(&qp->refcnt);
spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
if (qp && qp->ibqp.event_handler) {
@@ -346,7 +346,7 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
ibqp->event_handler(&e, ibqp->qp_context);
}
if (qp) {
- if (atomic_dec_and_test(&qp->refcnt))
+ if (refcount_dec_and_test(&qp->refcnt))
complete(&qp->free);
}
}
@@ -359,7 +359,7 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
spin_lock_irqsave(&dev->cq_tbl_lock, flags);
cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
if (cq)
- atomic_inc(&cq->refcnt);
+ refcount_inc(&cq->refcnt);
spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
if (cq && cq->ibcq.event_handler) {
@@ -372,7 +372,7 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
ibcq->event_handler(&e, ibcq->cq_context);
}
if (cq) {
- if (atomic_dec_and_test(&cq->refcnt))
+ if (refcount_dec_and_test(&cq->refcnt))
complete(&cq->free);
}
}
@@ -531,13 +531,13 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
spin_lock_irqsave(&dev->cq_tbl_lock, flags);
cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
if (cq)
- atomic_inc(&cq->refcnt);
+ refcount_inc(&cq->refcnt);
spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
if (cq && cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
if (cq) {
- if (atomic_dec_and_test(&cq->refcnt))
+ if (refcount_dec_and_test(&cq->refcnt))
complete(&cq->free);
}
pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
@@ -882,8 +882,8 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
dev_info(&pdev->dev, "device version %d, driver version %d\n",
dev->dsr_version, PVRDMA_VERSION);
- dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr),
- &dev->dsrbase, GFP_KERNEL);
+ dev->dsr = dma_zalloc_coherent(&pdev->dev, sizeof(*dev->dsr),
+ &dev->dsrbase, GFP_KERNEL);
if (!dev->dsr) {
dev_err(&pdev->dev, "failed to allocate shared region\n");
ret = -ENOMEM;
@@ -891,7 +891,6 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
}
/* Setup the shared region */
- memset(dev->dsr, 0, sizeof(*dev->dsr));
dev->dsr->driver_version = PVRDMA_VERSION;
dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
PVRDMA_GOS_BITS_32 :
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index 8519f3212e52..fa96fa4fb829 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -119,10 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
union pvrdma_cmd_resp rsp;
struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
- int nchunks;
int ret;
- int entry;
- struct scatterlist *sg;
if (length == 0 || length > dev->dsr->caps.max_mr_size) {
dev_warn(&dev->pdev->dev, "invalid mem region length\n");
@@ -137,13 +134,9 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_CAST(umem);
}
- nchunks = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry)
- nchunks += sg_dma_len(sg) >> PAGE_SHIFT;
-
- if (nchunks < 0 || nchunks > PVRDMA_PAGE_DIR_MAX_PAGES) {
+ if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
- nchunks);
+ umem->npages);
ret = -EINVAL;
goto err_umem;
}
@@ -158,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mmr.size = length;
mr->umem = umem;
- ret = pvrdma_page_dir_init(dev, &mr->pdir, nchunks, false);
+ ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false);
if (ret) {
dev_warn(&dev->pdev->dev,
"could not allocate page directory\n");
@@ -175,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
cmd->length = length;
cmd->pd_handle = to_vpd(pd)->pd_handle;
cmd->access_flags = access_flags;
- cmd->nchunks = nchunks;
+ cmd->nchunks = umem->npages;
cmd->pdir_dma = mr->pdir.dir_dma;
ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 4059308e1454..7bf518bdbf21 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -245,12 +245,13 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
mutex_init(&qp->mutex);
- atomic_set(&qp->refcnt, 1);
+ refcount_set(&qp->refcnt, 1);
init_completion(&qp->free);
qp->state = IB_QPS_RESET;
+ qp->is_kernel = !(pd->uobject && udata);
- if (pd->uobject && udata) {
+ if (!qp->is_kernel) {
dev_dbg(&dev->pdev->dev,
"create queuepair from user space\n");
@@ -291,8 +292,6 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
qp->npages_recv = 0;
qp->npages = qp->npages_send + qp->npages_recv;
} else {
- qp->is_kernel = true;
-
ret = pvrdma_set_sq_size(to_vdev(pd->device),
&init_attr->cap, qp);
if (ret)
@@ -394,7 +393,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
err_pdir:
pvrdma_page_dir_cleanup(dev, &qp->pdir);
err_umem:
- if (pd->uobject && udata) {
+ if (!qp->is_kernel) {
if (qp->rumem)
ib_umem_release(qp->rumem);
if (qp->sumem)
@@ -428,7 +427,7 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
- if (atomic_dec_and_test(&qp->refcnt))
+ if (refcount_dec_and_test(&qp->refcnt))
complete(&qp->free);
wait_for_completion(&qp->free);
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 97d71e49c092..fb52b669bfce 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -56,7 +56,7 @@
* rvt_cq_enter - add a new entry to the completion queue
* @cq: completion queue
* @entry: work completion entry to add
- * @sig: true if @entry is solicited
+ * @solicited: true if @entry is solicited
*
* This may be called with qp->s_lock held.
*/
@@ -101,8 +101,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
wc->uqueue[head].opcode = entry->opcode;
wc->uqueue[head].vendor_err = entry->vendor_err;
wc->uqueue[head].byte_len = entry->byte_len;
- wc->uqueue[head].ex.imm_data =
- (__u32 __force)entry->ex.imm_data;
+ wc->uqueue[head].ex.imm_data = entry->ex.imm_data;
wc->uqueue[head].qp_num = entry->qp->qp_num;
wc->uqueue[head].src_qp = entry->src_qp;
wc->uqueue[head].wc_flags = entry->wc_flags;
@@ -198,7 +197,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
return ERR_PTR(-EINVAL);
/* Allocate the completion queue structure. */
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
if (!cq)
return ERR_PTR(-ENOMEM);
@@ -214,7 +213,9 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
else
sz += sizeof(struct ib_wc) * (entries + 1);
- wc = vmalloc_user(sz);
+ wc = udata ?
+ vmalloc_user(sz) :
+ vzalloc_node(sz, rdi->dparms.node);
if (!wc) {
ret = ERR_PTR(-ENOMEM);
goto bail_cq;
@@ -369,7 +370,9 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
else
sz += sizeof(struct ib_wc) * (cqe + 1);
- wc = vmalloc_user(sz);
+ wc = udata ?
+ vmalloc_user(sz) :
+ vzalloc_node(sz, rdi->dparms.node);
if (!wc)
return -ENOMEM;
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index b3a38c5e4cad..dd11c6fcd060 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -272,7 +272,7 @@ bail:
/**
* rvt_attach_mcast - attach a qp to a multicast group
* @ibqp: Infiniband qp
- * @igd: multicast guid
+ * @gid: multicast guid
* @lid: multicast lid
*
* Return: 0 on success
@@ -335,7 +335,7 @@ bail_mcast:
/**
* rvt_detach_mcast - remove a qp from a multicast group
* @ibqp: Infiniband qp
- * @igd: multicast guid
+ * @gid: multicast guid
* @lid: multicast lid
*
* Return: 0 on success
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 42713511b53b..1b2e5362a3ff 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -768,7 +768,7 @@ bail:
/**
* rvt_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
+ * @ibfmr: the fast memory region to set up
* @page_list: the list of pages to associate with the fast memory region
* @list_len: the number of pages to associate with the fast memory region
* @iova: the virtual address of the start of the fast memory region
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index eae84c216e2f..c82e6bb3d77c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -269,7 +269,7 @@ no_qp_table:
/**
* free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
+ * @rdi: rvt device info structure
*
* There should not be any QPs still in use.
* Free memory for table.
@@ -335,9 +335,9 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
/**
* alloc_qpn - Allocate the next available qpn or zero/one for QP type
* IB_QPT_SMI/IB_QPT_GSI
- *@rdi: rvt device info structure
- *@qpt: queue pair number table pointer
- *@port_num: IB port number, 1 based, comes from core
+ * @rdi: rvt device info structure
+ * @qpt: queue pair number table pointer
+ * @port_num: IB port number, 1 based, comes from core
*
* Return: The queue pair number
*/
@@ -1650,9 +1650,9 @@ static inline int rvt_qp_valid_operation(
/**
* rvt_qp_is_avail - determine queue capacity
- * @qp - the qp
- * @rdi - the rdmavt device
- * @reserved_op - is reserved operation
+ * @qp: the qp
+ * @rdi: the rdmavt device
+ * @reserved_op: is reserved operation
*
* This assumes the s_hlock is held but the s_last
* qp variable is uncontrolled.
@@ -2074,6 +2074,7 @@ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_WAIT_RNR;
to = rvt_aeth_to_usec(aeth);
+ trace_rvt_rnrnak_add(qp, to);
hrtimer_start(&qp->s_rnr_timer,
ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
}
@@ -2103,17 +2104,14 @@ EXPORT_SYMBOL(rvt_stop_rc_timers);
* stop an rnr timer and return if the timer
* had been pending.
*/
-static int rvt_stop_rnr_timer(struct rvt_qp *qp)
+static void rvt_stop_rnr_timer(struct rvt_qp *qp)
{
- int rval = 0;
-
lockdep_assert_held(&qp->s_lock);
/* Remove QP from rnr timer */
if (qp->s_flags & RVT_S_WAIT_RNR) {
qp->s_flags &= ~RVT_S_WAIT_RNR;
- rval = hrtimer_try_to_cancel(&qp->s_rnr_timer);
+ trace_rvt_rnrnak_stop(qp, 0);
}
- return rval;
}
/**
@@ -2166,6 +2164,7 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
spin_lock_irqsave(&qp->s_lock, flags);
rvt_stop_rnr_timer(qp);
+ trace_rvt_rnrnak_timeout(qp, 0);
rdi->driver_f.schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
return HRTIMER_NORESTART;
@@ -2174,8 +2173,8 @@ EXPORT_SYMBOL(rvt_rc_rnr_retry);
/**
* rvt_qp_iter_init - initial for QP iteration
- * @rdi - rvt devinfo
- * @v - u64 value
+ * @rdi: rvt devinfo
+ * @v: u64 value
*
* This returns an iterator suitable for iterating QPs
* in the system.
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index f7c48e9023de..3707952b4364 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -90,7 +90,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr)
return ERR_PTR(-EINVAL);
- srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+ srq = kzalloc_node(sizeof(*srq), GFP_KERNEL, dev->dparms.node);
if (!srq)
return ERR_PTR(-ENOMEM);
@@ -101,7 +101,10 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
srq->rq.max_sge = srq_init_attr->attr.max_sge;
sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct rvt_rwqe);
- srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz);
+ srq->rq.wq = udata ?
+ vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) :
+ vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz,
+ dev->dparms.node);
if (!srq->rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_srq;
@@ -129,16 +132,12 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
ret = ERR_PTR(err);
goto bail_ip;
}
- } else {
- srq->ip = NULL;
}
/*
* ib_create_srq() will initialize srq->ibsrq.
*/
spin_lock_init(&srq->rq.lock);
- srq->rq.wq->head = 0;
- srq->rq.wq->tail = 0;
srq->limit = srq_init_attr->attr.srq_limit;
spin_lock(&dev->n_srqs_lock);
@@ -200,7 +199,10 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
sz = sizeof(struct rvt_rwqe) +
srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
- wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz);
+ wq = udata ?
+ vmalloc_user(sizeof(struct rvt_rwq) + size * sz) :
+ vzalloc_node(sizeof(struct rvt_rwq) + size * sz,
+ dev->dparms.node);
if (!wq)
return -ENOMEM;
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index bb4b1e710f22..36ddbd291ee0 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -45,8 +45,8 @@
*
*/
-#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi))
-#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi))
+#define RDI_DEV_ENTRY(rdi) __string(dev, rvt_get_ibdev_name(rdi))
+#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rvt_get_ibdev_name(rdi))
#include "trace_rvt.h"
#include "trace_qp.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h
index 4c77a3119bda..efc9d814b032 100644
--- a/drivers/infiniband/sw/rdmavt/trace_qp.h
+++ b/drivers/infiniband/sw/rdmavt/trace_qp.h
@@ -85,6 +85,48 @@ DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
TP_PROTO(struct rvt_qp *qp, u32 bucket),
TP_ARGS(qp, bucket));
+DECLARE_EVENT_CLASS(
+ rvt_rnrnak_template,
+ TP_PROTO(struct rvt_qp *qp, u32 to),
+ TP_ARGS(qp, to),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(void *, hrtimer)
+ __field(u32, s_flags)
+ __field(u32, to)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->hrtimer = &qp->s_rnr_timer;
+ __entry->s_flags = qp->s_flags;
+ __entry->to = to;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x hrtimer 0x%p s_flags 0x%x timeout %u us",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->hrtimer,
+ __entry->s_flags,
+ __entry->to
+ )
+);
+
+DEFINE_EVENT(
+ rvt_rnrnak_template, rvt_rnrnak_add,
+ TP_PROTO(struct rvt_qp *qp, u32 to),
+ TP_ARGS(qp, to));
+
+DEFINE_EVENT(
+ rvt_rnrnak_template, rvt_rnrnak_timeout,
+ TP_PROTO(struct rvt_qp *qp, u32 to),
+ TP_ARGS(qp, to));
+
+DEFINE_EVENT(
+ rvt_rnrnak_template, rvt_rnrnak_stop,
+ TP_PROTO(struct rvt_qp *qp, u32 to),
+ TP_ARGS(qp, to));
#endif /* __RVT_TRACE_QP_H */
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 64bdd442078a..a4553b2b3696 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -224,7 +224,8 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
* rvt_query_pkey - Return a pkey from the table at a given index
* @ibdev: Verbs IB dev
* @port_num: Port number, 1 based from ib core
- * @intex: Index into pkey table
+ * @index: Index into pkey table
+ * @pkey: returned pkey from the port pkey table
*
* Return: 0 on failure pkey otherwise
*/
@@ -255,7 +256,7 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index,
* rvt_query_gid - Return a gid from the table
* @ibdev: Verbs IB dev
* @port_num: Port number, 1 based from ib core
- * @index: = Index in table
+ * @guid_index: Index in table
* @gid: Gid to return
*
* Return: 0 on success
@@ -297,8 +298,8 @@ static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
/**
* rvt_alloc_ucontext - Allocate a user context
- * @ibdev: Vers IB dev
- * @data: User data allocated
+ * @ibdev: Verbs IB dev
+ * @udata: User data allocated
*/
static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
@@ -413,7 +414,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
* required for rdmavt to function.
*/
if ((!rdi->driver_f.port_callback) ||
- (!rdi->driver_f.get_card_name) ||
(!rdi->driver_f.get_pci_dev))
return -EINVAL;
break;
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index f363505312be..8823b2e7aac6 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -63,19 +63,19 @@
#define rvt_pr_info(rdi, fmt, ...) \
__rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \
- rdi->driver_f.get_card_name(rdi), \
+ rvt_get_ibdev_name(rdi), \
fmt, \
##__VA_ARGS__)
#define rvt_pr_warn(rdi, fmt, ...) \
__rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \
- rdi->driver_f.get_card_name(rdi), \
+ rvt_get_ibdev_name(rdi), \
fmt, \
##__VA_ARGS__)
#define rvt_pr_err(rdi, fmt, ...) \
__rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \
- rdi->driver_f.get_card_name(rdi), \
+ rvt_get_ibdev_name(rdi), \
fmt, \
##__VA_ARGS__)
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 320bffc980d8..bad4a576d7cf 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -1,8 +1,8 @@
config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
- depends on NET_UDP_TUNNEL
- depends on CRYPTO_CRC32
+ select NET_UDP_TUNNEL
+ select CRYPTO_CRC32
select DMA_VIRT_OPS
---help---
This driver implements the InfiniBand RDMA transport over
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 8c3d30b3092d..b7debb6f2eac 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -77,12 +77,6 @@ void rxe_release(struct kref *kref)
ib_dealloc_device(&rxe->ib_dev);
}
-void rxe_dev_put(struct rxe_dev *rxe)
-{
- kref_put(&rxe->ref_cnt, rxe_release);
-}
-EXPORT_SYMBOL_GPL(rxe_dev_put);
-
/* initialize rxe device parameters */
static int rxe_init_device_param(struct rxe_dev *rxe)
{
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 6447d736d5a4..7d232611303f 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -57,6 +57,7 @@
#include "rxe_hdr.h"
#include "rxe_param.h"
#include "rxe_verbs.h"
+#include "rxe_loc.h"
#define RXE_UVERBS_ABI_VERSION (1)
@@ -95,7 +96,10 @@ void rxe_remove_all(void);
int rxe_rcv(struct sk_buff *skb);
-void rxe_dev_put(struct rxe_dev *rxe);
+static inline void rxe_dev_put(struct rxe_dev *rxe)
+{
+ kref_put(&rxe->ref_cnt, rxe_release);
+}
struct rxe_dev *net_to_rxe(struct net_device *ndev);
struct rxe_dev *get_rxe_by_name(const char *name);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index d7472a442a2c..96c3a6c5c4b5 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -237,7 +237,6 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
void rxe_release(struct kref *kref);
-void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify);
int rxe_completer(void *arg);
int rxe_requester(void *arg);
int rxe_responder(void *arg);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 59dee10bebcb..159246b03867 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -82,7 +82,7 @@ struct rxe_dev *get_rxe_by_name(const char *name)
}
-struct rxe_recv_sockets recv_sockets;
+static struct rxe_recv_sockets recv_sockets;
struct device *rxe_dma_device(struct rxe_dev *rxe)
{
@@ -452,31 +452,26 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
{
- struct sk_buff *nskb;
struct rxe_av *av;
int err;
av = rxe_get_av(pkt);
- nskb = skb_clone(skb, GFP_ATOMIC);
- if (!nskb)
- return -ENOMEM;
-
- nskb->destructor = rxe_skb_tx_dtor;
- nskb->sk = pkt->qp->sk->sk;
+ skb->destructor = rxe_skb_tx_dtor;
+ skb->sk = pkt->qp->sk->sk;
rxe_add_ref(pkt->qp);
atomic_inc(&pkt->qp->skb_out);
if (av->network_type == RDMA_NETWORK_IPV4) {
- err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
+ err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
} else if (av->network_type == RDMA_NETWORK_IPV6) {
- err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
+ err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
} else {
pr_err("Unknown layer 3 protocol: %d\n", av->network_type);
atomic_dec(&pkt->qp->skb_out);
rxe_drop_ref(pkt->qp);
- kfree_skb(nskb);
+ kfree_skb(skb);
return -EINVAL;
}
@@ -485,7 +480,6 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
return -EAGAIN;
}
- kfree_skb(skb);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 1c06b3bfe1b6..728d8c71b36a 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -43,7 +43,6 @@ struct rxe_recv_sockets {
struct socket *sk6;
};
-extern struct rxe_recv_sockets recv_sockets;
extern struct notifier_block rxe_net_notifier;
void rxe_release_udp_tunnel(struct socket *sk);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 4469592b839d..137d6c0c49d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -824,9 +824,9 @@ void rxe_qp_destroy(struct rxe_qp *qp)
}
/* called when the last reference to the qp is dropped */
-void rxe_qp_cleanup(struct rxe_pool_entry *arg)
+static void rxe_qp_do_cleanup(struct work_struct *work)
{
- struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem);
+ struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
rxe_drop_all_mcast_groups(qp);
@@ -859,3 +859,11 @@ void rxe_qp_cleanup(struct rxe_pool_entry *arg)
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
sock_release(qp->sk);
}
+
+/* called when the last reference to the qp is dropped */
+void rxe_qp_cleanup(struct rxe_pool_entry *arg)
+{
+ struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem);
+
+ execute_in_process_context(rxe_qp_do_cleanup, &qp->cleanup_work);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index fb8c83e055e1..4c3f899241d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -336,7 +336,6 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
union ib_gid dgid;
union ib_gid *pdgid;
- u16 index;
if (skb->protocol == htons(ETH_P_IP)) {
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
@@ -348,7 +347,7 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
IB_GID_TYPE_ROCE_UDP_ENCAP,
- 1, rxe->ndev, &index);
+ 1, rxe->ndev, NULL);
}
/* rxe_rcv is called from the interface driver */
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 26a7f923045b..7bdaf71b8221 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -594,15 +594,8 @@ int rxe_requester(void *arg)
rxe_add_ref(qp);
next_wqe:
- if (unlikely(!qp->valid)) {
- rxe_drain_req_pkts(qp, true);
+ if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
goto exit;
- }
-
- if (unlikely(qp->req.state == QP_STATE_ERROR)) {
- rxe_drain_req_pkts(qp, true);
- goto exit;
- }
if (unlikely(qp->req.state == QP_STATE_RESET)) {
qp->req.wqe_index = consumer_index(qp->sq.queue);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 4240866a5331..d37bb9b97569 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -863,8 +863,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
if (pkt->mask & RXE_IMMDT_MASK) {
uwc->wc_flags |= IB_WC_WITH_IMM;
- uwc->ex.imm_data =
- (__u32 __force)immdt_imm(pkt);
+ uwc->ex.imm_data = immdt_imm(pkt);
}
if (pkt->mask & RXE_IETH_MASK) {
@@ -1210,7 +1209,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
}
-void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
+static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
{
struct sk_buff *skb;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index d03002b9d84d..7210a784abb4 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -814,6 +814,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
(queue_count(qp->sq.queue) > 1);
rxe_run_task(&qp->req.task, must_sched);
+ if (unlikely(qp->req.state == QP_STATE_ERROR))
+ rxe_run_task(&qp->comp.task, 1);
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 0c2dbe45c729..1019f5e7dbdd 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -35,6 +35,7 @@
#define RXE_VERBS_H
#include <linux/interrupt.h>
+#include <linux/workqueue.h>
#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
@@ -281,6 +282,8 @@ struct rxe_qp {
struct timer_list rnr_nak_timer;
spinlock_t state_lock; /* guard requester and completer */
+
+ struct execute_work cleanup_work;
};
enum rxe_mem_state {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 71ea9e26666c..962fbcb57dc7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -766,12 +766,14 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
skb_orphan(skb);
skb_dst_drop(skb);
- if (netif_queue_stopped(dev))
- if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
- IB_CQ_REPORT_MISSED_EVENTS)) {
+ if (netif_queue_stopped(dev)) {
+ rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
+ IB_CQ_REPORT_MISSED_EVENTS);
+ if (unlikely(rc < 0))
ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n");
+ else if (rc)
napi_schedule(&priv->send_napi);
- }
+ }
rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
if (unlikely(rc)) {
@@ -876,7 +878,7 @@ int ipoib_cm_dev_open(struct net_device *dev)
priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
if (IS_ERR(priv->cm.id)) {
- printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
+ pr_warn("%s: failed to create CM ID\n", priv->ca->name);
ret = PTR_ERR(priv->cm.id);
goto err_cm;
}
@@ -884,8 +886,8 @@ int ipoib_cm_dev_open(struct net_device *dev)
ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
0);
if (ret) {
- printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
- IPOIB_CM_IETF_ID | priv->qp->qp_num);
+ pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
+ IPOIB_CM_IETF_ID | priv->qp->qp_num);
goto err_listen;
}
@@ -1562,7 +1564,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
if (IS_ERR(priv->cm.srq)) {
if (PTR_ERR(priv->cm.srq) != -ENOSYS)
- printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n",
+ pr_warn("%s: failed to allocate SRQ, error %ld\n",
priv->ca->name, PTR_ERR(priv->cm.srq));
priv->cm.srq = NULL;
return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index e6151a29c412..10384ea50bed 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -644,7 +644,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
if (netif_queue_stopped(dev))
if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
- IB_CQ_REPORT_MISSED_EVENTS))
+ IB_CQ_REPORT_MISSED_EVENTS) < 0)
ipoib_warn(priv, "request notify on send CQ failed\n");
rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
@@ -1085,8 +1085,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
netif_addr_unlock_bh(priv->dev);
- err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
- priv->dev, &port, &index);
+ err = ib_find_gid(priv->ca, &search_gid, priv->dev, &port, &index);
netif_addr_lock_bh(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8880351df179..5930c7d9a8fb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -768,13 +768,30 @@ static void path_rec_completion(int status,
if (!status) {
struct rdma_ah_attr av;
- if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
+ if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
+ pathrec, &av))
ah = ipoib_create_ah(dev, priv->pd, &av);
}
spin_lock_irqsave(&priv->lock, flags);
if (!IS_ERR_OR_NULL(ah)) {
+ /*
+ * pathrec.dgid is used as the database key from the LLADDR,
+ * it must remain unchanged even if the SA returns a different
+ * GID to use in the AH.
+ */
+ if (memcmp(pathrec->dgid.raw, path->pathrec.dgid.raw,
+ sizeof(union ib_gid))) {
+ ipoib_dbg(
+ priv,
+ "%s got PathRec for gid %pI6 while asked for %pI6\n",
+ dev->name, pathrec->dgid.raw,
+ path->pathrec.dgid.raw);
+ memcpy(pathrec->dgid.raw, path->pathrec.dgid.raw,
+ sizeof(union ib_gid));
+ }
+
path->pathrec = *pathrec;
old_ah = path->ah;
@@ -840,6 +857,23 @@ static void path_rec_completion(int status,
}
}
+static void init_path_rec(struct ipoib_dev_priv *priv, struct ipoib_path *path,
+ void *gid)
+{
+ path->dev = priv->dev;
+
+ if (rdma_cap_opa_ah(priv->ca, priv->port))
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
+
+ memcpy(path->pathrec.dgid.raw, gid, sizeof(union ib_gid));
+ path->pathrec.sgid = priv->local_gid;
+ path->pathrec.pkey = cpu_to_be16(priv->pkey);
+ path->pathrec.numb_path = 1;
+ path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
+}
+
static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -852,21 +886,11 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
if (!path)
return NULL;
- path->dev = dev;
-
skb_queue_head_init(&path->queue);
INIT_LIST_HEAD(&path->neigh_list);
- if (rdma_cap_opa_ah(priv->ca, priv->port))
- path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
- else
- path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
- memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
- path->pathrec.sgid = priv->local_gid;
- path->pathrec.pkey = cpu_to_be16(priv->pkey);
- path->pathrec.numb_path = 1;
- path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
+ init_path_rec(priv, path, gid);
return path;
}
@@ -1005,6 +1029,10 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
spin_lock_irqsave(&priv->lock, flags);
+ /* no broadcast means that all paths are (going to be) not valid */
+ if (!priv->broadcast)
+ goto drop_and_unlock;
+
path = __path_find(dev, phdr->hwaddr + 4);
if (!path || !path->valid) {
int new_path = 0;
@@ -1014,6 +1042,10 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
new_path = 1;
}
if (path) {
+ if (!new_path)
+ /* make sure there is no changes in the existing path record */
+ init_path_rec(priv, path, phdr->hwaddr + 4);
+
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
@@ -1030,8 +1062,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
} else
__path_add(dev, path);
} else {
- ++dev->stats.tx_dropped;
- dev_kfree_skb_any(skb);
+ goto drop_and_unlock;
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1051,11 +1082,16 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
- ++dev->stats.tx_dropped;
- dev_kfree_skb_any(skb);
+ goto drop_and_unlock;
}
spin_unlock_irqrestore(&priv->lock, flags);
+ return;
+
+drop_and_unlock:
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -1674,8 +1710,8 @@ static int ipoib_dev_init_default(struct net_device *dev)
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
- printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
- priv->ca->name, ipoib_sendq_size);
+ pr_warn("%s: failed to allocate TX ring (%d entries)\n",
+ priv->ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup;
}
@@ -2207,16 +2243,17 @@ static struct net_device *ipoib_add_port(const char *format,
int result = -ENOMEM;
priv = ipoib_intf_alloc(hca, port, format);
- if (!priv)
+ if (!priv) {
+ pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
goto alloc_mem_failed;
+ }
SET_NETDEV_DEV(priv->dev, hca->dev.parent);
priv->dev->dev_id = port - 1;
result = ib_query_port(hca, port, &attr);
if (result) {
- printk(KERN_WARNING "%s: ib_query_port %d failed\n",
- hca->name, port);
+ pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
goto device_init_failed;
}
@@ -2231,8 +2268,8 @@ static struct net_device *ipoib_add_port(const char *format,
result = ib_query_pkey(hca, port, 0, &priv->pkey);
if (result) {
- printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
- hca->name, port, result);
+ pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
+ hca->name, port, result);
goto device_init_failed;
}
@@ -2249,8 +2286,8 @@ static struct net_device *ipoib_add_port(const char *format,
result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
if (result) {
- printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
- hca->name, port, result);
+ pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
+ hca->name, port, result);
goto device_init_failed;
}
@@ -2260,8 +2297,8 @@ static struct net_device *ipoib_add_port(const char *format,
result = ipoib_dev_init(priv->dev, hca, port);
if (result) {
- printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
- hca->name, port, result);
+ pr_warn("%s: failed to initialize port %d (ret = %d)\n",
+ hca->name, port, result);
goto device_init_failed;
}
@@ -2271,8 +2308,8 @@ static struct net_device *ipoib_add_port(const char *format,
result = register_netdev(priv->dev);
if (result) {
- printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
- hca->name, port, result);
+ pr_warn("%s: couldn't register ipoib port %d; error %d\n",
+ hca->name, port, result);
goto register_failed;
}
@@ -2337,8 +2374,7 @@ static void ipoib_add_one(struct ib_device *device)
}
if (!count) {
- pr_err("Failed to init port, removing it\n");
- ipoib_remove_one(device, dev_list);
+ kfree(dev_list);
return;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index a1ed25422b72..984a88096f39 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -178,7 +178,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL,
priv, &cq_attr);
if (IS_ERR(priv->recv_cq)) {
- printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
+ pr_warn("%s: failed to create receive CQ\n", ca->name);
goto out_cm_dev_cleanup;
}
@@ -187,7 +187,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->send_cq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL,
priv, &cq_attr);
if (IS_ERR(priv->send_cq)) {
- printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+ pr_warn("%s: failed to create send CQ\n", ca->name);
goto out_free_recv_cq;
}
@@ -208,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
- printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
+ pr_warn("%s: failed to create QP\n", ca->name);
goto out_free_send_cq;
}
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2a07692007bd..df49c4eb67f7 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -142,8 +142,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
}
- iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
- "VA:%#llX + unsol:%d\n",
+ iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X VA:%#llX + unsol:%d\n",
task->itt, mem_reg->rkey,
(unsigned long long)mem_reg->sge.addr, unsol_sz);
}
@@ -436,7 +435,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
{
struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_tx_desc *tx_desc = NULL;
+ struct iser_tx_desc *tx_desc;
struct iser_mem_reg *mem_reg;
unsigned long buf_offset;
unsigned long data_seg_len;
@@ -452,10 +451,8 @@ int iser_send_data_out(struct iscsi_conn *conn,
__func__,(int)itt,(int)data_seg_len,(int)buf_offset);
tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
- if (tx_desc == NULL) {
- iser_err("Failed to alloc desc for post dataout\n");
+ if (!tx_desc)
return -ENOMEM;
- }
tx_desc->type = ISCSI_TX_DATAOUT;
tx_desc->cqe.done = iser_dataout_comp;
@@ -475,8 +472,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
tx_desc->num_sge = 2;
if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
- iser_err("Offset:%ld & DSL:%ld in Data-Out "
- "inconsistent with total len:%ld, itt:%d\n",
+ iser_err("Offset:%ld & DSL:%ld in Data-Out inconsistent with total len:%ld, itt:%d\n",
buf_offset, data_seg_len,
iser_task->data[ISER_DIR_OUT].data_len, itt);
err = -EINVAL;
@@ -614,8 +610,8 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
iser_conn, rkey);
if (unlikely(!iser_conn->snd_w_inv)) {
- iser_err("conn %p: unexpected remote invalidation, "
- "terminating connection\n", iser_conn);
+ iser_err("conn %p: unexpected remote invalidation, terminating connection\n",
+ iser_conn);
return -EPROTO;
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 1b02283ce20e..fff40b097947 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2124,6 +2124,9 @@ isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
u32 rkey, offset;
int ret;
+ if (cmd->ctx_init_done)
+ goto rdma_ctx_post;
+
if (dir == DMA_FROM_DEVICE) {
addr = cmd->write_va;
rkey = cmd->write_stag;
@@ -2151,11 +2154,15 @@ isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
se_cmd->t_data_sg, se_cmd->t_data_nents,
offset, addr, rkey, dir);
}
+
if (ret < 0) {
isert_err("Cmd: %p failed to prepare RDMA res\n", cmd);
return ret;
}
+ cmd->ctx_init_done = true;
+
+rdma_ctx_post:
ret = rdma_rw_ctx_post(&cmd->rw, conn->qp, port_num, cqe, chain_wr);
if (ret < 0)
isert_err("Cmd: %p failed to post RDMA res\n", cmd);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index d6fd248320ae..3b296bac4f60 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -126,6 +126,7 @@ struct isert_cmd {
struct rdma_rw_ctx rw;
struct work_struct comp_work;
struct scatterlist sg;
+ bool ctx_init_done;
};
static inline struct isert_cmd *tx_desc_to_cmd(struct iser_tx_desc *desc)
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 4b615c1451e7..15711dcc6f58 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -710,7 +710,7 @@ vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
/**
* opa_vnic_vema_send_trap -- This function sends a trap to the EM
- * @cport: pointer to vnic control port
+ * @adapter: pointer to vnic adapter
* @data: pointer to trap data filled by calling function
* @lid: issuers lid (encap_slid from vesw_port_info)
*
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 972d4b3c5223..b48843833d69 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -41,6 +41,7 @@
#include <linux/random.h>
#include <linux/jiffies.h>
#include <linux/lockdep.h>
+#include <linux/inet.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h>
@@ -144,7 +145,9 @@ static void srp_remove_one(struct ib_device *device, void *client_data);
static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
const char *opname);
-static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
static struct workqueue_struct *srp_remove_wq;
@@ -265,8 +268,8 @@ static void srp_qp_event(struct ib_event *event, void *context)
ib_event_msg(event->event), event->event);
}
-static int srp_init_qp(struct srp_target_port *target,
- struct ib_qp *qp)
+static int srp_init_ib_qp(struct srp_target_port *target,
+ struct ib_qp *qp)
{
struct ib_qp_attr *attr;
int ret;
@@ -277,7 +280,7 @@ static int srp_init_qp(struct srp_target_port *target,
ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
target->srp_host->port,
- be16_to_cpu(target->pkey),
+ be16_to_cpu(target->ib_cm.pkey),
&attr->pkey_index);
if (ret)
goto out;
@@ -298,32 +301,110 @@ out:
return ret;
}
-static int srp_new_cm_id(struct srp_rdma_ch *ch)
+static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
struct ib_cm_id *new_cm_id;
new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
- srp_cm_handler, ch);
+ srp_ib_cm_handler, ch);
if (IS_ERR(new_cm_id))
return PTR_ERR(new_cm_id);
- if (ch->cm_id)
- ib_destroy_cm_id(ch->cm_id);
- ch->cm_id = new_cm_id;
+ if (ch->ib_cm.cm_id)
+ ib_destroy_cm_id(ch->ib_cm.cm_id);
+ ch->ib_cm.cm_id = new_cm_id;
if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
target->srp_host->port))
- ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
+ ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
else
- ch->path.rec_type = SA_PATH_REC_TYPE_IB;
- ch->path.sgid = target->sgid;
- ch->path.dgid = target->orig_dgid;
- ch->path.pkey = target->pkey;
- ch->path.service_id = target->service_id;
+ ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
+ ch->ib_cm.path.sgid = target->sgid;
+ ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
+ ch->ib_cm.path.pkey = target->ib_cm.pkey;
+ ch->ib_cm.path.service_id = target->ib_cm.service_id;
return 0;
}
+static const char *inet_ntop(const void *sa, char *dst, unsigned int size)
+{
+ switch (((struct sockaddr *)sa)->sa_family) {
+ case AF_INET:
+ snprintf(dst, size, "%pI4",
+ &((struct sockaddr_in *)sa)->sin_addr);
+ break;
+ case AF_INET6:
+ snprintf(dst, size, "%pI6",
+ &((struct sockaddr_in6 *)sa)->sin6_addr);
+ break;
+ default:
+ snprintf(dst, size, "???");
+ break;
+ }
+ return dst;
+}
+
+static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
+{
+ struct srp_target_port *target = ch->target;
+ struct rdma_cm_id *new_cm_id;
+ char src_addr[64], dst_addr[64];
+ int ret;
+
+ new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
+ RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(new_cm_id)) {
+ ret = PTR_ERR(new_cm_id);
+ new_cm_id = NULL;
+ goto out;
+ }
+
+ init_completion(&ch->done);
+ ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
+ (struct sockaddr *)&target->rdma_cm.src : NULL,
+ (struct sockaddr *)&target->rdma_cm.dst,
+ SRP_PATH_REC_TIMEOUT_MS);
+ if (ret) {
+ pr_err("No route available from %s to %s (%d)\n",
+ target->rdma_cm.src_specified ?
+ inet_ntop(&target->rdma_cm.src, src_addr,
+ sizeof(src_addr)) : "(any)",
+ inet_ntop(&target->rdma_cm.dst, dst_addr,
+ sizeof(dst_addr)),
+ ret);
+ goto out;
+ }
+ ret = wait_for_completion_interruptible(&ch->done);
+ if (ret < 0)
+ goto out;
+
+ ret = ch->status;
+ if (ret) {
+ pr_err("Resolving address %s failed (%d)\n",
+ inet_ntop(&target->rdma_cm.dst, dst_addr,
+ sizeof(dst_addr)),
+ ret);
+ goto out;
+ }
+
+ swap(ch->rdma_cm.cm_id, new_cm_id);
+
+out:
+ if (new_cm_id)
+ rdma_destroy_id(new_cm_id);
+
+ return ret;
+}
+
+static int srp_new_cm_id(struct srp_rdma_ch *ch)
+{
+ struct srp_target_port *target = ch->target;
+
+ return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
+ srp_new_ib_cm_id(ch);
+}
+
static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
{
struct srp_device *dev = target->srp_host->srp_dev;
@@ -521,16 +602,25 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
init_attr->send_cq = send_cq;
init_attr->recv_cq = recv_cq;
- qp = ib_create_qp(dev->pd, init_attr);
- if (IS_ERR(qp)) {
- ret = PTR_ERR(qp);
+ if (target->using_rdma_cm) {
+ ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
+ qp = ch->rdma_cm.cm_id->qp;
+ } else {
+ qp = ib_create_qp(dev->pd, init_attr);
+ if (!IS_ERR(qp)) {
+ ret = srp_init_ib_qp(target, qp);
+ if (ret)
+ ib_destroy_qp(qp);
+ } else {
+ ret = PTR_ERR(qp);
+ }
+ }
+ if (ret) {
+ pr_err("QP creation failed for dev %s: %d\n",
+ dev_name(&dev->dev->dev), ret);
goto err_send_cq;
}
- ret = srp_init_qp(target, qp);
- if (ret)
- goto err_qp;
-
if (dev->use_fast_reg) {
fr_pool = srp_alloc_fr_pool(target);
if (IS_ERR(fr_pool)) {
@@ -574,7 +664,10 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- ib_destroy_qp(qp);
+ if (target->using_rdma_cm)
+ rdma_destroy_qp(ch->rdma_cm.cm_id);
+ else
+ ib_destroy_qp(qp);
err_send_cq:
ib_free_cq(send_cq);
@@ -600,9 +693,16 @@ static void srp_free_ch_ib(struct srp_target_port *target,
if (!ch->target)
return;
- if (ch->cm_id) {
- ib_destroy_cm_id(ch->cm_id);
- ch->cm_id = NULL;
+ if (target->using_rdma_cm) {
+ if (ch->rdma_cm.cm_id) {
+ rdma_destroy_id(ch->rdma_cm.cm_id);
+ ch->rdma_cm.cm_id = NULL;
+ }
+ } else {
+ if (ch->ib_cm.cm_id) {
+ ib_destroy_cm_id(ch->ib_cm.cm_id);
+ ch->ib_cm.cm_id = NULL;
+ }
}
/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
@@ -658,16 +758,16 @@ static void srp_path_rec_completion(int status,
shost_printk(KERN_ERR, target->scsi_host,
PFX "Got failed path rec status %d\n", status);
else
- ch->path = *pathrec;
+ ch->ib_cm.path = *pathrec;
complete(&ch->done);
}
-static int srp_lookup_path(struct srp_rdma_ch *ch)
+static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
int ret = -ENODEV;
- ch->path.numb_path = 1;
+ ch->ib_cm.path.numb_path = 1;
init_completion(&ch->done);
@@ -678,10 +778,10 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
if (!scsi_host_get(target->scsi_host))
goto out;
- ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
+ ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
target->srp_host->srp_dev->dev,
target->srp_host->port,
- &ch->path,
+ &ch->ib_cm.path,
IB_SA_PATH_REC_SERVICE_ID |
IB_SA_PATH_REC_DGID |
IB_SA_PATH_REC_SGID |
@@ -690,8 +790,8 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
SRP_PATH_REC_TIMEOUT_MS,
GFP_KERNEL,
srp_path_rec_completion,
- ch, &ch->path_query);
- ret = ch->path_query_id;
+ ch, &ch->ib_cm.path_query);
+ ret = ch->ib_cm.path_query_id;
if (ret < 0)
goto put;
@@ -702,7 +802,10 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
ret = ch->status;
if (ret < 0)
shost_printk(KERN_WARNING, target->scsi_host,
- PFX "Path record query failed\n");
+ PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
+ ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
+ be16_to_cpu(target->ib_cm.pkey),
+ be64_to_cpu(target->ib_cm.service_id));
put:
scsi_host_put(target->scsi_host);
@@ -711,6 +814,34 @@ out:
return ret;
}
+static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
+{
+ struct srp_target_port *target = ch->target;
+ int ret;
+
+ init_completion(&ch->done);
+
+ ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
+ if (ret)
+ return ret;
+
+ wait_for_completion_interruptible(&ch->done);
+
+ if (ch->status != 0)
+ shost_printk(KERN_WARNING, target->scsi_host,
+ PFX "Path resolution failed\n");
+
+ return ch->status;
+}
+
+static int srp_lookup_path(struct srp_rdma_ch *ch)
+{
+ struct srp_target_port *target = ch->target;
+
+ return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
+ srp_ib_lookup_path(ch);
+}
+
static u8 srp_get_subnet_timeout(struct srp_host *host)
{
struct ib_port_attr attr;
@@ -732,48 +863,76 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
{
struct srp_target_port *target = ch->target;
struct {
- struct ib_cm_req_param param;
- struct srp_login_req priv;
+ struct rdma_conn_param rdma_param;
+ struct srp_login_req_rdma rdma_req;
+ struct ib_cm_req_param ib_param;
+ struct srp_login_req ib_req;
} *req = NULL;
+ char *ipi, *tpi;
int status;
- u8 subnet_timeout;
-
- subnet_timeout = srp_get_subnet_timeout(target->srp_host);
req = kzalloc(sizeof *req, GFP_KERNEL);
if (!req)
return -ENOMEM;
- req->param.primary_path = &ch->path;
- req->param.alternate_path = NULL;
- req->param.service_id = target->service_id;
- req->param.qp_num = ch->qp->qp_num;
- req->param.qp_type = ch->qp->qp_type;
- req->param.private_data = &req->priv;
- req->param.private_data_len = sizeof req->priv;
- req->param.flow_control = 1;
-
- get_random_bytes(&req->param.starting_psn, 4);
- req->param.starting_psn &= 0xffffff;
+ req->ib_param.flow_control = 1;
+ req->ib_param.retry_count = target->tl_retry_count;
/*
* Pick some arbitrary defaults here; we could make these
* module parameters if anyone cared about setting them.
*/
- req->param.responder_resources = 4;
- req->param.remote_cm_response_timeout = subnet_timeout + 2;
- req->param.local_cm_response_timeout = subnet_timeout + 2;
- req->param.retry_count = target->tl_retry_count;
- req->param.rnr_retry_count = 7;
- req->param.max_cm_retries = 15;
-
- req->priv.opcode = SRP_LOGIN_REQ;
- req->priv.tag = 0;
- req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
- req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
+ req->ib_param.responder_resources = 4;
+ req->ib_param.rnr_retry_count = 7;
+ req->ib_param.max_cm_retries = 15;
+
+ req->ib_req.opcode = SRP_LOGIN_REQ;
+ req->ib_req.tag = 0;
+ req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
+ req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
SRP_BUF_FORMAT_INDIRECT);
- req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
- SRP_MULTICHAN_SINGLE);
+ req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
+ SRP_MULTICHAN_SINGLE);
+
+ if (target->using_rdma_cm) {
+ req->rdma_param.flow_control = req->ib_param.flow_control;
+ req->rdma_param.responder_resources =
+ req->ib_param.responder_resources;
+ req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
+ req->rdma_param.retry_count = req->ib_param.retry_count;
+ req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
+ req->rdma_param.private_data = &req->rdma_req;
+ req->rdma_param.private_data_len = sizeof(req->rdma_req);
+
+ req->rdma_req.opcode = req->ib_req.opcode;
+ req->rdma_req.tag = req->ib_req.tag;
+ req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
+ req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
+ req->rdma_req.req_flags = req->ib_req.req_flags;
+
+ ipi = req->rdma_req.initiator_port_id;
+ tpi = req->rdma_req.target_port_id;
+ } else {
+ u8 subnet_timeout;
+
+ subnet_timeout = srp_get_subnet_timeout(target->srp_host);
+
+ req->ib_param.primary_path = &ch->ib_cm.path;
+ req->ib_param.alternate_path = NULL;
+ req->ib_param.service_id = target->ib_cm.service_id;
+ get_random_bytes(&req->ib_param.starting_psn, 4);
+ req->ib_param.starting_psn &= 0xffffff;
+ req->ib_param.qp_num = ch->qp->qp_num;
+ req->ib_param.qp_type = ch->qp->qp_type;
+ req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
+ req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
+ req->ib_param.private_data = &req->ib_req;
+ req->ib_param.private_data_len = sizeof(req->ib_req);
+
+ ipi = req->ib_req.initiator_port_id;
+ tpi = req->ib_req.target_port_id;
+ }
+
/*
* In the published SRP specification (draft rev. 16a), the
* port identifier format is 8 bytes of ID extension followed
@@ -784,19 +943,15 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
* recognized by the I/O Class they report.
*/
if (target->io_class == SRP_REV10_IB_IO_CLASS) {
- memcpy(req->priv.initiator_port_id,
- &target->sgid.global.interface_id, 8);
- memcpy(req->priv.initiator_port_id + 8,
- &target->initiator_ext, 8);
- memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
- memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
+ memcpy(ipi, &target->sgid.global.interface_id, 8);
+ memcpy(ipi + 8, &target->initiator_ext, 8);
+ memcpy(tpi, &target->ioc_guid, 8);
+ memcpy(tpi + 8, &target->id_ext, 8);
} else {
- memcpy(req->priv.initiator_port_id,
- &target->initiator_ext, 8);
- memcpy(req->priv.initiator_port_id + 8,
- &target->sgid.global.interface_id, 8);
- memcpy(req->priv.target_port_id, &target->id_ext, 8);
- memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
+ memcpy(ipi, &target->initiator_ext, 8);
+ memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
+ memcpy(tpi, &target->id_ext, 8);
+ memcpy(tpi + 8, &target->ioc_guid, 8);
}
/*
@@ -809,12 +964,14 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
PFX "Topspin/Cisco initiator port ID workaround "
"activated for target GUID %016llx\n",
be64_to_cpu(target->ioc_guid));
- memset(req->priv.initiator_port_id, 0, 8);
- memcpy(req->priv.initiator_port_id + 8,
- &target->srp_host->srp_dev->dev->node_guid, 8);
+ memset(ipi, 0, 8);
+ memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
}
- status = ib_send_cm_req(ch->cm_id, &req->param);
+ if (target->using_rdma_cm)
+ status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
+ else
+ status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
kfree(req);
@@ -841,14 +998,23 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
static void srp_disconnect_target(struct srp_target_port *target)
{
struct srp_rdma_ch *ch;
- int i;
+ int i, ret;
/* XXX should send SRP_I_LOGOUT request */
for (i = 0; i < target->ch_count; i++) {
ch = &target->ch[i];
ch->connected = false;
- if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
+ ret = 0;
+ if (target->using_rdma_cm) {
+ if (ch->rdma_cm.cm_id)
+ rdma_disconnect(ch->rdma_cm.cm_id);
+ } else {
+ if (ch->ib_cm.cm_id)
+ ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
+ NULL, 0);
+ }
+ if (ret < 0) {
shost_printk(KERN_DEBUG, target->scsi_host,
PFX "Sending CM DREQ failed\n");
}
@@ -962,6 +1128,7 @@ static void srp_remove_target(struct srp_target_port *target)
scsi_remove_host(target->scsi_host);
srp_stop_rport_timers(target->rport);
srp_disconnect_target(target);
+ kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
for (i = 0; i < target->ch_count; i++) {
ch = &target->ch[i];
srp_free_ch_ib(target, ch);
@@ -2349,7 +2516,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
struct srp_target_port *target = ch->target;
struct ib_qp_attr *qp_attr = NULL;
int attr_mask = 0;
- int ret;
+ int ret = 0;
int i;
if (lrsp->opcode == SRP_LOGIN_RSP) {
@@ -2379,40 +2546,42 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
goto error;
}
- ret = -ENOMEM;
- qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
- if (!qp_attr)
- goto error;
-
- qp_attr->qp_state = IB_QPS_RTR;
- ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
- if (ret)
- goto error_free;
-
- ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
- if (ret)
- goto error_free;
-
for (i = 0; i < target->queue_size; i++) {
struct srp_iu *iu = ch->rx_ring[i];
ret = srp_post_recv(ch, iu);
if (ret)
- goto error_free;
+ goto error;
}
- qp_attr->qp_state = IB_QPS_RTS;
- ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
- if (ret)
- goto error_free;
+ if (!target->using_rdma_cm) {
+ ret = -ENOMEM;
+ qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
+ if (!qp_attr)
+ goto error;
- target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
+ qp_attr->qp_state = IB_QPS_RTR;
+ ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
+ if (ret)
+ goto error_free;
- ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
- if (ret)
- goto error_free;
+ ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
+ if (ret)
+ goto error_free;
- ret = ib_send_cm_rtu(cm_id, NULL, 0);
+ qp_attr->qp_state = IB_QPS_RTS;
+ ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
+ if (ret)
+ goto error_free;
+
+ target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
+
+ ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
+ if (ret)
+ goto error_free;
+
+ ret = ib_send_cm_rtu(cm_id, NULL, 0);
+ }
error_free:
kfree(qp_attr);
@@ -2421,41 +2590,43 @@ error:
ch->status = ret;
}
-static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *event,
- struct srp_rdma_ch *ch)
+static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
+ struct ib_cm_event *event,
+ struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
struct Scsi_Host *shost = target->scsi_host;
struct ib_class_port_info *cpi;
int opcode;
+ u16 dlid;
switch (event->param.rej_rcvd.reason) {
case IB_CM_REJ_PORT_CM_REDIRECT:
cpi = event->param.rej_rcvd.ari;
- sa_path_set_dlid(&ch->path, ntohs(cpi->redirect_lid));
- ch->path.pkey = cpi->redirect_pkey;
+ dlid = be16_to_cpu(cpi->redirect_lid);
+ sa_path_set_dlid(&ch->ib_cm.path, dlid);
+ ch->ib_cm.path.pkey = cpi->redirect_pkey;
cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
- memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
+ memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
- ch->status = sa_path_get_dlid(&ch->path) ?
- SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
+ ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
break;
case IB_CM_REJ_PORT_REDIRECT:
if (srp_target_is_topspin(target)) {
+ union ib_gid *dgid = &ch->ib_cm.path.dgid;
+
/*
* Topspin/Cisco SRP gateways incorrectly send
* reject reason code 25 when they mean 24
* (port redirect).
*/
- memcpy(ch->path.dgid.raw,
- event->param.rej_rcvd.ari, 16);
+ memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
shost_printk(KERN_DEBUG, shost,
PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
- be64_to_cpu(ch->path.dgid.global.subnet_prefix),
- be64_to_cpu(ch->path.dgid.global.interface_id));
+ be64_to_cpu(dgid->global.subnet_prefix),
+ be64_to_cpu(dgid->global.interface_id));
ch->status = SRP_PORT_REDIRECT;
} else {
@@ -2484,7 +2655,8 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
shost_printk(KERN_WARNING, shost, PFX
"SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
target->sgid.raw,
- target->orig_dgid.raw, reason);
+ target->ib_cm.orig_dgid.raw,
+ reason);
} else
shost_printk(KERN_WARNING, shost,
" REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
@@ -2504,7 +2676,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
}
}
-static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct srp_rdma_ch *ch = cm_id->context;
struct srp_target_port *target = ch->target;
@@ -2527,7 +2699,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
comp = 1;
- srp_cm_rej_handler(cm_id, event, ch);
+ srp_ib_cm_rej_handler(cm_id, event, ch);
break;
case IB_CM_DREQ_RECEIVED:
@@ -2565,6 +2737,135 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
return 0;
}
+static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
+ struct rdma_cm_event *event)
+{
+ struct srp_target_port *target = ch->target;
+ struct Scsi_Host *shost = target->scsi_host;
+ int opcode;
+
+ switch (event->status) {
+ case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
+ shost_printk(KERN_WARNING, shost,
+ " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
+ ch->status = -ECONNRESET;
+ break;
+
+ case IB_CM_REJ_CONSUMER_DEFINED:
+ opcode = *(u8 *) event->param.conn.private_data;
+ if (opcode == SRP_LOGIN_REJ) {
+ struct srp_login_rej *rej =
+ (struct srp_login_rej *)
+ event->param.conn.private_data;
+ u32 reason = be32_to_cpu(rej->reason);
+
+ if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
+ shost_printk(KERN_WARNING, shost,
+ PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
+ else
+ shost_printk(KERN_WARNING, shost,
+ PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
+ } else {
+ shost_printk(KERN_WARNING, shost,
+ " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
+ opcode);
+ }
+ ch->status = -ECONNRESET;
+ break;
+
+ case IB_CM_REJ_STALE_CONN:
+ shost_printk(KERN_WARNING, shost,
+ " REJ reason: stale connection\n");
+ ch->status = SRP_STALE_CONN;
+ break;
+
+ default:
+ shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
+ event->status);
+ ch->status = -ECONNRESET;
+ break;
+ }
+}
+
+static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ struct srp_rdma_ch *ch = cm_id->context;
+ struct srp_target_port *target = ch->target;
+ int comp = 0;
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ ch->status = 0;
+ comp = 1;
+ break;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ ch->status = -ENXIO;
+ comp = 1;
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ ch->status = 0;
+ comp = 1;
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ ch->status = -EHOSTUNREACH;
+ comp = 1;
+ break;
+
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ shost_printk(KERN_DEBUG, target->scsi_host,
+ PFX "Sending CM REQ failed\n");
+ comp = 1;
+ ch->status = -ECONNRESET;
+ break;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ comp = 1;
+ srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
+ break;
+
+ case RDMA_CM_EVENT_REJECTED:
+ shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
+ comp = 1;
+
+ srp_rdma_cm_rej_handler(ch, event);
+ break;
+
+ case RDMA_CM_EVENT_DISCONNECTED:
+ if (ch->connected) {
+ shost_printk(KERN_WARNING, target->scsi_host,
+ PFX "received DREQ\n");
+ rdma_disconnect(ch->rdma_cm.cm_id);
+ comp = 1;
+ ch->status = 0;
+ queue_work(system_long_wq, &target->tl_err_work);
+ }
+ break;
+
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "connection closed\n");
+
+ comp = 1;
+ ch->status = 0;
+ break;
+
+ default:
+ shost_printk(KERN_WARNING, target->scsi_host,
+ PFX "Unhandled CM event %d\n", event->event);
+ break;
+ }
+
+ if (comp)
+ complete(&ch->done);
+
+ return 0;
+}
+
/**
* srp_change_queue_depth - setting device queue depth
* @sdev: scsi device struct
@@ -2717,6 +3018,16 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
+static int srp_target_alloc(struct scsi_target *starget)
+{
+ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
+ struct srp_target_port *target = host_to_target(shost);
+
+ if (target->target_can_queue)
+ starget->can_queue = target->target_can_queue;
+ return 0;
+}
+
static int srp_slave_alloc(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
@@ -2766,7 +3077,10 @@ static ssize_t show_service_id(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
+ if (target->using_rdma_cm)
+ return -ENOENT;
+ return sprintf(buf, "0x%016llx\n",
+ be64_to_cpu(target->ib_cm.service_id));
}
static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
@@ -2774,7 +3088,9 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
+ if (target->using_rdma_cm)
+ return -ENOENT;
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
}
static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
@@ -2791,7 +3107,9 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
struct srp_target_port *target = host_to_target(class_to_shost(dev));
struct srp_rdma_ch *ch = &target->ch[0];
- return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
+ if (target->using_rdma_cm)
+ return -ENOENT;
+ return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
}
static ssize_t show_orig_dgid(struct device *dev,
@@ -2799,7 +3117,9 @@ static ssize_t show_orig_dgid(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
+ if (target->using_rdma_cm)
+ return -ENOENT;
+ return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
}
static ssize_t show_req_lim(struct device *dev,
@@ -2921,6 +3241,7 @@ static struct scsi_host_template srp_template = {
.module = THIS_MODULE,
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
+ .target_alloc = srp_target_alloc,
.slave_alloc = srp_slave_alloc,
.slave_configure = srp_slave_configure,
.info = srp_target_info,
@@ -3044,6 +3365,9 @@ static bool srp_conn_unique(struct srp_host *host,
if (t != target &&
target->id_ext == t->id_ext &&
target->ioc_guid == t->ioc_guid &&
+ (!target->using_rdma_cm ||
+ memcmp(&target->rdma_cm.dst, &t->rdma_cm.dst,
+ sizeof(target->rdma_cm.dst)) == 0) &&
target->initiator_ext == t->initiator_ext) {
ret = false;
break;
@@ -3060,6 +3384,9 @@ out:
*
* id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
* pkey=<P_Key>,service_id=<service ID>
+ * or
+ * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
+ * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
*
* to the add_target sysfs attribute.
*/
@@ -3080,11 +3407,20 @@ enum {
SRP_OPT_COMP_VECTOR = 1 << 12,
SRP_OPT_TL_RETRY_COUNT = 1 << 13,
SRP_OPT_QUEUE_SIZE = 1 << 14,
- SRP_OPT_ALL = (SRP_OPT_ID_EXT |
- SRP_OPT_IOC_GUID |
- SRP_OPT_DGID |
- SRP_OPT_PKEY |
- SRP_OPT_SERVICE_ID),
+ SRP_OPT_IP_SRC = 1 << 15,
+ SRP_OPT_IP_DEST = 1 << 16,
+ SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
+};
+
+static unsigned int srp_opt_mandatory[] = {
+ SRP_OPT_ID_EXT |
+ SRP_OPT_IOC_GUID |
+ SRP_OPT_DGID |
+ SRP_OPT_PKEY |
+ SRP_OPT_SERVICE_ID,
+ SRP_OPT_ID_EXT |
+ SRP_OPT_IOC_GUID |
+ SRP_OPT_IP_DEST,
};
static const match_table_t srp_opt_tokens = {
@@ -3095,6 +3431,7 @@ static const match_table_t srp_opt_tokens = {
{ SRP_OPT_SERVICE_ID, "service_id=%s" },
{ SRP_OPT_MAX_SECT, "max_sect=%d" },
{ SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
+ { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" },
{ SRP_OPT_IO_CLASS, "io_class=%x" },
{ SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
{ SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
@@ -3103,15 +3440,33 @@ static const match_table_t srp_opt_tokens = {
{ SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
{ SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
{ SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
+ { SRP_OPT_IP_SRC, "src=%s" },
+ { SRP_OPT_IP_DEST, "dest=%s" },
{ SRP_OPT_ERR, NULL }
};
-static int srp_parse_options(const char *buf, struct srp_target_port *target)
+static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
+ const char *addr_port_str)
+{
+ char *addr = kstrdup(addr_port_str, GFP_KERNEL);
+ char *port_str = addr;
+ int ret;
+
+ if (!addr)
+ return -ENOMEM;
+ strsep(&port_str, ":");
+ ret = inet_pton_with_scope(net, AF_UNSPEC, addr, port_str, sa);
+ kfree(addr);
+ return ret;
+}
+
+static int srp_parse_options(struct net *net, const char *buf,
+ struct srp_target_port *target)
{
char *options, *sep_opt;
char *p;
- char dgid[3];
substring_t args[MAX_OPT_ARGS];
+ unsigned long long ull;
int opt_mask = 0;
int token;
int ret = -EINVAL;
@@ -3136,7 +3491,13 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
ret = -ENOMEM;
goto out;
}
- target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
+ ret = kstrtoull(p, 16, &ull);
+ if (ret) {
+ pr_warn("invalid id_ext parameter '%s'\n", p);
+ kfree(p);
+ goto out;
+ }
+ target->id_ext = cpu_to_be64(ull);
kfree(p);
break;
@@ -3146,7 +3507,13 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
ret = -ENOMEM;
goto out;
}
- target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
+ ret = kstrtoull(p, 16, &ull);
+ if (ret) {
+ pr_warn("invalid ioc_guid parameter '%s'\n", p);
+ kfree(p);
+ goto out;
+ }
+ target->ioc_guid = cpu_to_be64(ull);
kfree(p);
break;
@@ -3162,16 +3529,10 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
goto out;
}
- for (i = 0; i < 16; ++i) {
- strlcpy(dgid, p + i * 2, sizeof(dgid));
- if (sscanf(dgid, "%hhx",
- &target->orig_dgid.raw[i]) < 1) {
- ret = -EINVAL;
- kfree(p);
- goto out;
- }
- }
+ ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
kfree(p);
+ if (ret < 0)
+ goto out;
break;
case SRP_OPT_PKEY:
@@ -3179,7 +3540,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
pr_warn("bad P_Key parameter '%s'\n", p);
goto out;
}
- target->pkey = cpu_to_be16(token);
+ target->ib_cm.pkey = cpu_to_be16(token);
break;
case SRP_OPT_SERVICE_ID:
@@ -3188,7 +3549,45 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
ret = -ENOMEM;
goto out;
}
- target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
+ ret = kstrtoull(p, 16, &ull);
+ if (ret) {
+ pr_warn("bad service_id parameter '%s'\n", p);
+ kfree(p);
+ goto out;
+ }
+ target->ib_cm.service_id = cpu_to_be64(ull);
+ kfree(p);
+ break;
+
+ case SRP_OPT_IP_SRC:
+ p = match_strdup(args);
+ if (!p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = srp_parse_in(net, &target->rdma_cm.src.ss, p);
+ if (ret < 0) {
+ pr_warn("bad source parameter '%s'\n", p);
+ kfree(p);
+ goto out;
+ }
+ target->rdma_cm.src_specified = true;
+ kfree(p);
+ break;
+
+ case SRP_OPT_IP_DEST:
+ p = match_strdup(args);
+ if (!p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p);
+ if (ret < 0) {
+ pr_warn("bad dest parameter '%s'\n", p);
+ kfree(p);
+ goto out;
+ }
+ target->using_rdma_cm = true;
kfree(p);
break;
@@ -3221,6 +3620,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
target->scsi_host->cmd_per_lun = token;
break;
+ case SRP_OPT_TARGET_CAN_QUEUE:
+ if (match_int(args, &token) || token < 1) {
+ pr_warn("bad max target_can_queue parameter '%s'\n",
+ p);
+ goto out;
+ }
+ target->target_can_queue = token;
+ break;
+
case SRP_OPT_IO_CLASS:
if (match_hex(args, &token)) {
pr_warn("bad IO class parameter '%s'\n", p);
@@ -3242,7 +3650,13 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
ret = -ENOMEM;
goto out;
}
- target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
+ ret = kstrtoull(p, 16, &ull);
+ if (ret) {
+ pr_warn("bad initiator_ext value '%s'\n", p);
+ kfree(p);
+ goto out;
+ }
+ target->initiator_ext = cpu_to_be64(ull);
kfree(p);
break;
@@ -3297,14 +3711,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
}
}
- if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
- ret = 0;
- else
- for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
- if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
- !(srp_opt_tokens[i].token & opt_mask))
- pr_warn("target creation request is missing parameter '%s'\n",
- srp_opt_tokens[i].pattern);
+ for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
+ if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
+ ret = 0;
+ break;
+ }
+ }
+ if (ret)
+ pr_warn("target creation request is missing one or more parameters\n");
if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
&& (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
@@ -3345,6 +3759,7 @@ static ssize_t srp_create_target(struct device *dev,
target = host_to_target(target_host);
+ target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
target->io_class = SRP_REV16A_IB_IO_CLASS;
target->scsi_host = target_host;
target->srp_host = host;
@@ -3366,18 +3781,29 @@ static ssize_t srp_create_target(struct device *dev,
if (ret < 0)
goto put;
- ret = srp_parse_options(buf, target);
+ ret = srp_parse_options(target->net, buf, target);
if (ret)
goto out;
target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
if (!srp_conn_unique(target->srp_host, target)) {
- shost_printk(KERN_INFO, target->scsi_host,
- PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
- be64_to_cpu(target->id_ext),
- be64_to_cpu(target->ioc_guid),
- be64_to_cpu(target->initiator_ext));
+ if (target->using_rdma_cm) {
+ char dst_addr[64];
+
+ shost_printk(KERN_INFO, target->scsi_host,
+ PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%s\n",
+ be64_to_cpu(target->id_ext),
+ be64_to_cpu(target->ioc_guid),
+ inet_ntop(&target->rdma_cm.dst, dst_addr,
+ sizeof(dst_addr)));
+ } else {
+ shost_printk(KERN_INFO, target->scsi_host,
+ PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
+ be64_to_cpu(target->id_ext),
+ be64_to_cpu(target->ioc_guid),
+ be64_to_cpu(target->initiator_ext));
+ }
ret = -EEXIST;
goto out;
}
@@ -3478,11 +3904,18 @@ static ssize_t srp_create_target(struct device *dev,
ret = srp_connect_ch(ch, multich);
if (ret) {
+ char dst[64];
+
+ if (target->using_rdma_cm)
+ inet_ntop(&target->rdma_cm.dst, dst,
+ sizeof(dst));
+ else
+ snprintf(dst, sizeof(dst), "%pI6",
+ target->ib_cm.orig_dgid.raw);
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Connection %d/%d to %pI6 failed\n",
+ PFX "Connection %d/%d to %s failed\n",
ch_start + cpu_idx,
- target->ch_count,
- ch->target->orig_dgid.raw);
+ target->ch_count, dst);
if (node_idx == 0 && cpu_idx == 0) {
goto free_ch;
} else {
@@ -3507,13 +3940,25 @@ connected:
goto err_disconnect;
if (target->state != SRP_TARGET_REMOVED) {
- shost_printk(KERN_DEBUG, target->scsi_host, PFX
- "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
- be64_to_cpu(target->id_ext),
- be64_to_cpu(target->ioc_guid),
- be16_to_cpu(target->pkey),
- be64_to_cpu(target->service_id),
- target->sgid.raw, target->orig_dgid.raw);
+ if (target->using_rdma_cm) {
+ char dst[64];
+
+ inet_ntop(&target->rdma_cm.dst, dst, sizeof(dst));
+ shost_printk(KERN_DEBUG, target->scsi_host, PFX
+ "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %s\n",
+ be64_to_cpu(target->id_ext),
+ be64_to_cpu(target->ioc_guid),
+ target->sgid.raw, dst);
+ } else {
+ shost_printk(KERN_DEBUG, target->scsi_host, PFX
+ "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
+ be64_to_cpu(target->id_ext),
+ be64_to_cpu(target->ioc_guid),
+ be16_to_cpu(target->ib_cm.pkey),
+ be64_to_cpu(target->ib_cm.service_id),
+ target->sgid.raw,
+ target->ib_cm.orig_dgid.raw);
+ }
}
ret = count;
@@ -3523,8 +3968,16 @@ out:
put:
scsi_host_put(target->scsi_host);
- if (ret < 0)
+ if (ret < 0) {
+ /*
+ * If a call to srp_remove_target() has not been scheduled,
+ * drop the network namespace reference now that was obtained
+ * earlier in this function.
+ */
+ if (target->state != SRP_TARGET_REMOVED)
+ kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
scsi_host_put(target->scsi_host);
+ }
return ret;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index a814f5ef16f9..a2706086b9c7 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -45,6 +45,7 @@
#include <rdma/ib_sa.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_fmr_pool.h>
+#include <rdma/rdma_cm.h>
enum {
SRP_PATH_REC_TIMEOUT_MS = 1000,
@@ -153,11 +154,18 @@ struct srp_rdma_ch {
struct completion done;
int status;
- struct sa_path_rec path;
- struct ib_sa_query *path_query;
- int path_query_id;
+ union {
+ struct ib_cm {
+ struct sa_path_rec path;
+ struct ib_sa_query *path_query;
+ int path_query_id;
+ struct ib_cm_id *cm_id;
+ } ib_cm;
+ struct rdma_cm {
+ struct rdma_cm_id *cm_id;
+ } rdma_cm;
+ };
- struct ib_cm_id *cm_id;
struct srp_iu **tx_ring;
struct srp_iu **rx_ring;
struct srp_request *req_ring;
@@ -182,6 +190,7 @@ struct srp_target_port {
/* read only in the hot path */
u32 global_rkey;
struct srp_rdma_ch *ch;
+ struct net *net;
u32 ch_count;
u32 lkey;
enum srp_target_state state;
@@ -194,7 +203,6 @@ struct srp_target_port {
union ib_gid sgid;
__be64 id_ext;
__be64 ioc_guid;
- __be64 service_id;
__be64 initiator_ext;
u16 io_class;
struct srp_host *srp_host;
@@ -203,6 +211,7 @@ struct srp_target_port {
char target_name[32];
unsigned int scsi_id;
unsigned int sg_tablesize;
+ unsigned int target_can_queue;
int mr_pool_size;
int mr_per_cmd;
int queue_size;
@@ -210,8 +219,28 @@ struct srp_target_port {
int comp_vector;
int tl_retry_count;
- union ib_gid orig_dgid;
- __be16 pkey;
+ bool using_rdma_cm;
+
+ union {
+ struct {
+ __be64 service_id;
+ union ib_gid orig_dgid;
+ __be16 pkey;
+ } ib_cm;
+ struct {
+ union {
+ struct sockaddr_in ip4;
+ struct sockaddr_in6 ip6;
+ struct sockaddr_storage ss;
+ } src;
+ union {
+ struct sockaddr_in ip4;
+ struct sockaddr_in6 ip6;
+ struct sockaddr_storage ss;
+ } dst;
+ bool src_specified;
+ } rdma_cm;
+ };
u32 rq_tmo_jiffies;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index bfa576aa9f03..0373b7c40902 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -41,6 +41,7 @@
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/atomic.h>
+#include <rdma/ib_cache.h>
#include <scsi/scsi_proto.h>
#include <scsi/scsi_tcq.h>
#include <target/target_core_base.h>
@@ -120,7 +121,9 @@ static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new)
}
/**
- * srpt_event_handler() - Asynchronous IB event callback function.
+ * srpt_event_handler - asynchronous IB event callback function
+ * @handler: IB event handler registered by ib_register_event_handler().
+ * @event: Description of the event that occurred.
*
* Callback function called by the InfiniBand core when an asynchronous IB
* event occurs. This callback may occur in interrupt context. See also
@@ -132,6 +135,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
{
struct srpt_device *sdev;
struct srpt_port *sport;
+ u8 port_num;
sdev = ib_get_client_data(event->device, &srpt_client);
if (!sdev || sdev->device != event->device)
@@ -142,10 +146,15 @@ static void srpt_event_handler(struct ib_event_handler *handler,
switch (event->event) {
case IB_EVENT_PORT_ERR:
- if (event->element.port_num <= sdev->device->phys_port_cnt) {
- sport = &sdev->port[event->element.port_num - 1];
+ port_num = event->element.port_num - 1;
+ if (port_num < sdev->device->phys_port_cnt) {
+ sport = &sdev->port[port_num];
sport->lid = 0;
sport->sm_lid = 0;
+ } else {
+ WARN(true, "event %d: port_num %d out of range 1..%d\n",
+ event->event, port_num + 1,
+ sdev->device->phys_port_cnt);
}
break;
case IB_EVENT_PORT_ACTIVE:
@@ -155,25 +164,31 @@ static void srpt_event_handler(struct ib_event_handler *handler,
case IB_EVENT_CLIENT_REREGISTER:
case IB_EVENT_GID_CHANGE:
/* Refresh port data asynchronously. */
- if (event->element.port_num <= sdev->device->phys_port_cnt) {
- sport = &sdev->port[event->element.port_num - 1];
+ port_num = event->element.port_num - 1;
+ if (port_num < sdev->device->phys_port_cnt) {
+ sport = &sdev->port[port_num];
if (!sport->lid && !sport->sm_lid)
schedule_work(&sport->work);
+ } else {
+ WARN(true, "event %d: port_num %d out of range 1..%d\n",
+ event->event, port_num + 1,
+ sdev->device->phys_port_cnt);
}
break;
default:
- pr_err("received unrecognized IB event %d\n",
- event->event);
+ pr_err("received unrecognized IB event %d\n", event->event);
break;
}
}
/**
- * srpt_srq_event() - SRQ event callback function.
+ * srpt_srq_event - SRQ event callback function
+ * @event: Description of the event that occurred.
+ * @ctx: Context pointer specified at SRQ creation time.
*/
static void srpt_srq_event(struct ib_event *event, void *ctx)
{
- pr_info("SRQ event %d\n", event->event);
+ pr_debug("SRQ event %d\n", event->event);
}
static const char *get_ch_state_name(enum rdma_ch_state s)
@@ -194,16 +209,18 @@ static const char *get_ch_state_name(enum rdma_ch_state s)
}
/**
- * srpt_qp_event() - QP event callback function.
+ * srpt_qp_event - QP event callback function
+ * @event: Description of the event that occurred.
+ * @ch: SRPT RDMA channel.
*/
static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
{
- pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n",
- event->event, ch->cm_id, ch->sess_name, ch->state);
+ pr_debug("QP event %d on ch=%p sess_name=%s state=%d\n",
+ event->event, ch, ch->sess_name, ch->state);
switch (event->event) {
case IB_EVENT_COMM_EST:
- ib_cm_notify(ch->cm_id, event->event);
+ ib_cm_notify(ch->ib_cm.cm_id, event->event);
break;
case IB_EVENT_QP_LAST_WQE_REACHED:
pr_debug("%s-%d, state %s: received Last WQE event.\n",
@@ -217,8 +234,8 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
}
/**
- * srpt_set_ioc() - Helper function for initializing an IOUnitInfo structure.
- *
+ * srpt_set_ioc - initialize a IOUnitInfo structure
+ * @c_list: controller list.
* @slot: one-based slot number.
* @value: four-bit value.
*
@@ -241,7 +258,8 @@ static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
}
/**
- * srpt_get_class_port_info() - Copy ClassPortInfo to a management datagram.
+ * srpt_get_class_port_info - copy ClassPortInfo to a management datagram
+ * @mad: Datagram that will be sent as response to DM_ATTR_CLASS_PORT_INFO.
*
* See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture
* Specification.
@@ -260,7 +278,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
}
/**
- * srpt_get_iou() - Write IOUnitInfo to a management datagram.
+ * srpt_get_iou - write IOUnitInfo to a management datagram
+ * @mad: Datagram that will be sent as response to DM_ATTR_IOU_INFO.
*
* See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture
* Specification. See also section B.7, table B.6 in the SRP r16a document.
@@ -284,7 +303,10 @@ static void srpt_get_iou(struct ib_dm_mad *mad)
}
/**
- * srpt_get_ioc() - Write IOControllerprofile to a management datagram.
+ * srpt_get_ioc - write IOControllerprofile to a management datagram
+ * @sport: HCA port through which the MAD has been received.
+ * @slot: Slot number specified in DM_ATTR_IOC_PROFILE query.
+ * @mad: Datagram that will be sent as response to DM_ATTR_IOC_PROFILE.
*
* See also section 16.3.3.4 IOControllerProfile in the InfiniBand
* Architecture Specification. See also section B.7, table B.7 in the SRP
@@ -314,7 +336,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
if (sdev->use_srq)
send_queue_depth = sdev->srq_size;
else
- send_queue_depth = min(SRPT_RQ_SIZE,
+ send_queue_depth = min(MAX_SRPT_RQ_SIZE,
sdev->device->attrs.max_qp_wr);
memset(iocp, 0, sizeof(*iocp));
@@ -342,7 +364,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
}
/**
- * srpt_get_svc_entries() - Write ServiceEntries to a management datagram.
+ * srpt_get_svc_entries - write ServiceEntries to a management datagram
+ * @ioc_guid: I/O controller GUID to use in reply.
+ * @slot: I/O controller number.
+ * @hi: End of the range of service entries to be specified in the reply.
+ * @lo: Start of the range of service entries to be specified in the reply..
+ * @mad: Datagram that will be sent as response to DM_ATTR_SVC_ENTRIES.
*
* See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
* Specification. See also section B.7, table B.8 in the SRP r16a document.
@@ -379,8 +406,8 @@ static void srpt_get_svc_entries(u64 ioc_guid,
}
/**
- * srpt_mgmt_method_get() - Process a received management datagram.
- * @sp: source port through which the MAD has been received.
+ * srpt_mgmt_method_get - process a received management datagram
+ * @sp: HCA port through which the MAD has been received.
* @rq_mad: received MAD.
* @rsp_mad: response MAD.
*/
@@ -419,7 +446,9 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
}
/**
- * srpt_mad_send_handler() - Post MAD-send callback function.
+ * srpt_mad_send_handler - MAD send completion callback
+ * @mad_agent: Return value of ib_register_mad_agent().
+ * @mad_wc: Work completion reporting that the MAD has been sent.
*/
static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
@@ -429,7 +458,10 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
}
/**
- * srpt_mad_recv_handler() - MAD reception callback function.
+ * srpt_mad_recv_handler - MAD reception callback function
+ * @mad_agent: Return value of ib_register_mad_agent().
+ * @send_buf: Not used.
+ * @mad_wc: Work completion reporting that a MAD has been received.
*/
static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_buf *send_buf,
@@ -493,8 +525,18 @@ err:
ib_free_recv_mad(mad_wc);
}
+static int srpt_format_guid(char *buf, unsigned int size, const __be64 *guid)
+{
+ const __be16 *g = (const __be16 *)guid;
+
+ return snprintf(buf, size, "%04x:%04x:%04x:%04x",
+ be16_to_cpu(g[0]), be16_to_cpu(g[1]),
+ be16_to_cpu(g[2]), be16_to_cpu(g[3]));
+}
+
/**
- * srpt_refresh_port() - Configure a HCA port.
+ * srpt_refresh_port - configure a HCA port
+ * @sport: SRPT HCA port.
*
* Enable InfiniBand management datagram processing, update the cached sm_lid,
* lid and gid values, and register a callback function for processing MADs
@@ -507,7 +549,6 @@ static int srpt_refresh_port(struct srpt_port *sport)
struct ib_mad_reg_req reg_req;
struct ib_port_modify port_modify;
struct ib_port_attr port_attr;
- __be16 *guid;
int ret;
memset(&port_modify, 0, sizeof(port_modify));
@@ -531,11 +572,8 @@ static int srpt_refresh_port(struct srpt_port *sport)
goto err_query_port;
sport->port_guid_wwn.priv = sport;
- guid = (__be16 *)&sport->gid.global.interface_id;
- snprintf(sport->port_guid, sizeof(sport->port_guid),
- "%04x:%04x:%04x:%04x",
- be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
- be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
+ srpt_format_guid(sport->port_guid, sizeof(sport->port_guid),
+ &sport->gid.global.interface_id);
sport->port_gid_wwn.priv = sport;
snprintf(sport->port_gid, sizeof(sport->port_gid),
"0x%016llx%016llx",
@@ -577,7 +615,8 @@ err_mod_port:
}
/**
- * srpt_unregister_mad_agent() - Unregister MAD callback functions.
+ * srpt_unregister_mad_agent - unregister MAD callback functions
+ * @sdev: SRPT HCA pointer.
*
* Note: It is safe to call this function more than once for the same device.
*/
@@ -602,7 +641,11 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
}
/**
- * srpt_alloc_ioctx() - Allocate an SRPT I/O context structure.
+ * srpt_alloc_ioctx - allocate a SRPT I/O context structure
+ * @sdev: SRPT HCA pointer.
+ * @ioctx_size: I/O context size.
+ * @dma_size: Size of I/O context DMA buffer.
+ * @dir: DMA data direction.
*/
static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev,
int ioctx_size, int dma_size,
@@ -633,7 +676,11 @@ err:
}
/**
- * srpt_free_ioctx() - Free an SRPT I/O context structure.
+ * srpt_free_ioctx - free a SRPT I/O context structure
+ * @sdev: SRPT HCA pointer.
+ * @ioctx: I/O context pointer.
+ * @dma_size: Size of I/O context DMA buffer.
+ * @dir: DMA data direction.
*/
static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
int dma_size, enum dma_data_direction dir)
@@ -647,7 +694,7 @@ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
}
/**
- * srpt_alloc_ioctx_ring() - Allocate a ring of SRPT I/O context structures.
+ * srpt_alloc_ioctx_ring - allocate a ring of SRPT I/O context structures
* @sdev: Device to allocate the I/O context ring for.
* @ring_size: Number of elements in the I/O context ring.
* @ioctx_size: I/O context size.
@@ -685,7 +732,12 @@ out:
}
/**
- * srpt_free_ioctx_ring() - Free the ring of SRPT I/O context structures.
+ * srpt_free_ioctx_ring - free the ring of SRPT I/O context structures
+ * @ioctx_ring: I/O context ring to be freed.
+ * @sdev: SRPT HCA pointer.
+ * @ring_size: Number of ring elements.
+ * @dma_size: Size of I/O context DMA buffer.
+ * @dir: DMA data direction.
*/
static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
struct srpt_device *sdev, int ring_size,
@@ -702,23 +754,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
}
/**
- * srpt_get_cmd_state() - Get the state of a SCSI command.
- */
-static enum srpt_command_state srpt_get_cmd_state(struct srpt_send_ioctx *ioctx)
-{
- enum srpt_command_state state;
- unsigned long flags;
-
- BUG_ON(!ioctx);
-
- spin_lock_irqsave(&ioctx->spinlock, flags);
- state = ioctx->state;
- spin_unlock_irqrestore(&ioctx->spinlock, flags);
- return state;
-}
-
-/**
- * srpt_set_cmd_state() - Set the state of a SCSI command.
+ * srpt_set_cmd_state - set the state of a SCSI command
+ * @ioctx: Send I/O context.
+ * @new: New I/O context state.
*
* Does not modify the state of aborted commands. Returns the previous command
* state.
@@ -727,21 +765,19 @@ static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx,
enum srpt_command_state new)
{
enum srpt_command_state previous;
- unsigned long flags;
- BUG_ON(!ioctx);
-
- spin_lock_irqsave(&ioctx->spinlock, flags);
previous = ioctx->state;
if (previous != SRPT_STATE_DONE)
ioctx->state = new;
- spin_unlock_irqrestore(&ioctx->spinlock, flags);
return previous;
}
/**
- * srpt_test_and_set_cmd_state() - Test and set the state of a command.
+ * srpt_test_and_set_cmd_state - test and set the state of a command
+ * @ioctx: Send I/O context.
+ * @old: Current I/O context state.
+ * @new: New I/O context state.
*
* Returns true if and only if the previous command state was equal to 'old'.
*/
@@ -750,22 +786,23 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx,
enum srpt_command_state new)
{
enum srpt_command_state previous;
- unsigned long flags;
WARN_ON(!ioctx);
WARN_ON(old == SRPT_STATE_DONE);
WARN_ON(new == SRPT_STATE_NEW);
- spin_lock_irqsave(&ioctx->spinlock, flags);
previous = ioctx->state;
if (previous == old)
ioctx->state = new;
- spin_unlock_irqrestore(&ioctx->spinlock, flags);
+
return previous == old;
}
/**
- * srpt_post_recv() - Post an IB receive request.
+ * srpt_post_recv - post an IB receive request
+ * @sdev: SRPT HCA pointer.
+ * @ch: SRPT RDMA channel.
+ * @ioctx: Receive I/O context pointer.
*/
static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
struct srpt_recv_ioctx *ioctx)
@@ -791,7 +828,8 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
}
/**
- * srpt_zerolength_write() - Perform a zero-length RDMA write.
+ * srpt_zerolength_write - perform a zero-length RDMA write
+ * @ch: SRPT RDMA channel.
*
* A quote from the InfiniBand specification: C9-88: For an HCA responder
* using Reliable Connection service, for each zero-length RDMA READ or WRITE
@@ -802,6 +840,9 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
{
struct ib_send_wr wr, *bad_wr;
+ pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
+ ch->qp->qp_num);
+
memset(&wr, 0, sizeof(wr));
wr.opcode = IB_WR_RDMA_WRITE;
wr.wr_cqe = &ch->zw_cqe;
@@ -813,13 +854,17 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srpt_rdma_ch *ch = cq->cq_context;
+ pr_debug("%s-%d wc->status %d\n", ch->sess_name, ch->qp->qp_num,
+ wc->status);
+
if (wc->status == IB_WC_SUCCESS) {
srpt_process_wait_list(ch);
} else {
if (srpt_set_ch_state(ch, CH_DISCONNECTED))
schedule_work(&ch->release_work);
else
- WARN_ONCE(1, "%s-%d\n", ch->sess_name, ch->qp->qp_num);
+ pr_debug("%s-%d: already disconnected.\n",
+ ch->sess_name, ch->qp->qp_num);
}
}
@@ -928,11 +973,13 @@ static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
}
/**
- * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
+ * srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request
* @ioctx: Pointer to the I/O context associated with the request.
* @srp_cmd: Pointer to the SRP_CMD request data.
* @dir: Pointer to the variable to which the transfer direction will be
* written.
+ * @sg: [out] scatterlist allocated for the parsed SRP_CMD.
+ * @sg_cnt: [out] length of @sg.
* @data_len: Pointer to the variable to which the total data length of all
* descriptors in the SRP_CMD request will be written.
*
@@ -998,7 +1045,9 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
}
/**
- * srpt_init_ch_qp() - Initialize queue pair attributes.
+ * srpt_init_ch_qp - initialize queue pair attributes
+ * @ch: SRPT RDMA channel.
+ * @qp: Queue pair pointer.
*
* Initialized the attributes of queue pair 'qp' by allowing local write,
* remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT.
@@ -1015,7 +1064,12 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
attr->qp_state = IB_QPS_INIT;
attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
attr->port_num = ch->sport->port;
- attr->pkey_index = 0;
+
+ ret = ib_find_cached_pkey(ch->sport->sdev->device, ch->sport->port,
+ ch->pkey, &attr->pkey_index);
+ if (ret < 0)
+ pr_err("Translating pkey %#x failed (%d) - using index 0\n",
+ ch->pkey, ret);
ret = ib_modify_qp(qp, attr,
IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
@@ -1026,7 +1080,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
}
/**
- * srpt_ch_qp_rtr() - Change the state of a channel to 'ready to receive' (RTR).
+ * srpt_ch_qp_rtr - change the state of a channel to 'ready to receive' (RTR)
* @ch: channel of the queue pair.
* @qp: queue pair to change the state of.
*
@@ -1043,7 +1097,7 @@ static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
int ret;
qp_attr.qp_state = IB_QPS_RTR;
- ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
+ ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask);
if (ret)
goto out;
@@ -1056,7 +1110,7 @@ out:
}
/**
- * srpt_ch_qp_rts() - Change the state of a channel to 'ready to send' (RTS).
+ * srpt_ch_qp_rts - change the state of a channel to 'ready to send' (RTS)
* @ch: channel of the queue pair.
* @qp: queue pair to change the state of.
*
@@ -1073,7 +1127,7 @@ static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
int ret;
qp_attr.qp_state = IB_QPS_RTS;
- ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
+ ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask);
if (ret)
goto out;
@@ -1086,7 +1140,8 @@ out:
}
/**
- * srpt_ch_qp_err() - Set the channel queue pair state to 'error'.
+ * srpt_ch_qp_err - set the channel queue pair state to 'error'
+ * @ch: SRPT RDMA channel.
*/
static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
{
@@ -1097,7 +1152,8 @@ static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
}
/**
- * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator.
+ * srpt_get_send_ioctx - obtain an I/O context for sending to the initiator
+ * @ch: SRPT RDMA channel.
*/
static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
{
@@ -1119,11 +1175,9 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
return ioctx;
BUG_ON(ioctx->ch != ch);
- spin_lock_init(&ioctx->spinlock);
ioctx->state = SRPT_STATE_NEW;
ioctx->n_rdma = 0;
ioctx->n_rw_ctx = 0;
- init_completion(&ioctx->tx_done);
ioctx->queue_status_only = false;
/*
* transport_init_se_cmd() does not initialize all fields, so do it
@@ -1136,14 +1190,12 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
}
/**
- * srpt_abort_cmd() - Abort a SCSI command.
+ * srpt_abort_cmd - abort a SCSI command
* @ioctx: I/O context associated with the SCSI command.
- * @context: Preferred execution context.
*/
static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
{
enum srpt_command_state state;
- unsigned long flags;
BUG_ON(!ioctx);
@@ -1152,7 +1204,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
* the ib_srpt driver, change the state to the next state.
*/
- spin_lock_irqsave(&ioctx->spinlock, flags);
state = ioctx->state;
switch (state) {
case SRPT_STATE_NEED_DATA:
@@ -1167,7 +1218,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
__func__, state);
break;
}
- spin_unlock_irqrestore(&ioctx->spinlock, flags);
pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state,
ioctx->state, ioctx->cmd.tag);
@@ -1206,6 +1256,10 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
}
/**
+ * srpt_rdma_read_done - RDMA read completion callback
+ * @cq: Completion queue.
+ * @wc: Work completion.
+ *
* XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
* the data that has been transferred via IB RDMA had to be postponed until the
* check_stop_free() callback. None of this is necessary anymore and needs to
@@ -1233,11 +1287,11 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
target_execute_cmd(&ioctx->cmd);
else
pr_err("%s[%d]: wrong state = %d\n", __func__,
- __LINE__, srpt_get_cmd_state(ioctx));
+ __LINE__, ioctx->state);
}
/**
- * srpt_build_cmd_rsp() - Build an SRP_RSP response.
+ * srpt_build_cmd_rsp - build a SRP_RSP response
* @ch: RDMA channel through which the request has been received.
* @ioctx: I/O context associated with the SRP_CMD request. The response will
* be built in the buffer ioctx->buf points at and hence this function will
@@ -1297,7 +1351,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
}
/**
- * srpt_build_tskmgmt_rsp() - Build a task management response.
+ * srpt_build_tskmgmt_rsp - build a task management response
* @ch: RDMA channel through which the request has been received.
* @ioctx: I/O context in which the SRP_RSP response will be built.
* @rsp_code: RSP_CODE that will be stored in the response.
@@ -1345,7 +1399,10 @@ static int srpt_check_stop_free(struct se_cmd *cmd)
}
/**
- * srpt_handle_cmd() - Process SRP_CMD.
+ * srpt_handle_cmd - process a SRP_CMD information unit
+ * @ch: SRPT RDMA channel.
+ * @recv_ioctx: Receive I/O context.
+ * @send_ioctx: Send I/O context.
*/
static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
struct srpt_recv_ioctx *recv_ioctx,
@@ -1427,7 +1484,10 @@ static int srp_tmr_to_tcm(int fn)
}
/**
- * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit.
+ * srpt_handle_tsk_mgmt - process a SRP_TSK_MGMT information unit
+ * @ch: SRPT RDMA channel.
+ * @recv_ioctx: Receive I/O context.
+ * @send_ioctx: Send I/O context.
*
* Returns 0 if and only if the request will be processed by the target core.
*
@@ -1449,9 +1509,9 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
srp_tsk = recv_ioctx->ioctx.buf;
cmd = &send_ioctx->cmd;
- pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld"
- " cm_id %p sess %p\n", srp_tsk->tsk_mgmt_func,
- srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess);
+ pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld ch %p sess %p\n",
+ srp_tsk->tsk_mgmt_func, srp_tsk->task_tag, srp_tsk->tag, ch,
+ ch->sess);
srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
send_ioctx->cmd.tag = srp_tsk->tag;
@@ -1470,41 +1530,42 @@ fail:
}
/**
- * srpt_handle_new_iu() - Process a newly received information unit.
+ * srpt_handle_new_iu - process a newly received information unit
* @ch: RDMA channel through which the information unit has been received.
- * @ioctx: SRPT I/O context associated with the information unit.
+ * @recv_ioctx: Receive I/O context associated with the information unit.
*/
-static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
- struct srpt_recv_ioctx *recv_ioctx,
- struct srpt_send_ioctx *send_ioctx)
+static bool
+srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx)
{
+ struct srpt_send_ioctx *send_ioctx = NULL;
struct srp_cmd *srp_cmd;
+ bool res = false;
+ u8 opcode;
BUG_ON(!ch);
BUG_ON(!recv_ioctx);
+ if (unlikely(ch->state == CH_CONNECTING))
+ goto push;
+
ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
recv_ioctx->ioctx.dma, srp_max_req_size,
DMA_FROM_DEVICE);
- if (unlikely(ch->state == CH_CONNECTING))
- goto out_wait;
-
- if (unlikely(ch->state != CH_LIVE))
- return;
-
srp_cmd = recv_ioctx->ioctx.buf;
- if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) {
- if (!send_ioctx) {
- if (!list_empty(&ch->cmd_wait_list))
- goto out_wait;
- send_ioctx = srpt_get_send_ioctx(ch);
- }
+ opcode = srp_cmd->opcode;
+ if (opcode == SRP_CMD || opcode == SRP_TSK_MGMT) {
+ send_ioctx = srpt_get_send_ioctx(ch);
if (unlikely(!send_ioctx))
- goto out_wait;
+ goto push;
}
- switch (srp_cmd->opcode) {
+ if (!list_empty(&recv_ioctx->wait_list)) {
+ WARN_ON_ONCE(!ch->processing_wait_list);
+ list_del_init(&recv_ioctx->wait_list);
+ }
+
+ switch (opcode) {
case SRP_CMD:
srpt_handle_cmd(ch, recv_ioctx, send_ioctx);
break;
@@ -1524,16 +1585,22 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
pr_err("Received SRP_RSP\n");
break;
default:
- pr_err("received IU with unknown opcode 0x%x\n",
- srp_cmd->opcode);
+ pr_err("received IU with unknown opcode 0x%x\n", opcode);
break;
}
srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
- return;
+ res = true;
+
+out:
+ return res;
-out_wait:
- list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
+push:
+ if (list_empty(&recv_ioctx->wait_list)) {
+ WARN_ON_ONCE(ch->processing_wait_list);
+ list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
+ }
+ goto out;
}
static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1548,10 +1615,10 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
- srpt_handle_new_iu(ch, ioctx, NULL);
+ srpt_handle_new_iu(ch, ioctx);
} else {
- pr_info("receiving failed for ioctx %p with status %d\n",
- ioctx, wc->status);
+ pr_info_ratelimited("receiving failed for ioctx %p with status %d\n",
+ ioctx, wc->status);
}
}
@@ -1562,22 +1629,28 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
*/
static void srpt_process_wait_list(struct srpt_rdma_ch *ch)
{
- struct srpt_send_ioctx *ioctx;
+ struct srpt_recv_ioctx *recv_ioctx, *tmp;
+
+ WARN_ON_ONCE(ch->state == CH_CONNECTING);
- while (!list_empty(&ch->cmd_wait_list) &&
- ch->state >= CH_LIVE &&
- (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
- struct srpt_recv_ioctx *recv_ioctx;
+ if (list_empty(&ch->cmd_wait_list))
+ return;
- recv_ioctx = list_first_entry(&ch->cmd_wait_list,
- struct srpt_recv_ioctx,
- wait_list);
- list_del(&recv_ioctx->wait_list);
- srpt_handle_new_iu(ch, recv_ioctx, ioctx);
+ WARN_ON_ONCE(ch->processing_wait_list);
+ ch->processing_wait_list = true;
+ list_for_each_entry_safe(recv_ioctx, tmp, &ch->cmd_wait_list,
+ wait_list) {
+ if (!srpt_handle_new_iu(ch, recv_ioctx))
+ break;
}
+ ch->processing_wait_list = false;
}
/**
+ * srpt_send_done - send completion callback
+ * @cq: Completion queue.
+ * @wc: Work completion.
+ *
* Note: Although this has not yet been observed during tests, at least in
* theory it is possible that the srpt_get_send_ioctx() call invoked by
* srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1619,7 +1692,8 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
}
/**
- * srpt_create_ch_ib() - Create receive and send completion queues.
+ * srpt_create_ch_ib - create receive and send completion queues
+ * @ch: SRPT RDMA channel.
*/
static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
{
@@ -1627,7 +1701,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
struct srpt_port *sport = ch->sport;
struct srpt_device *sdev = sport->sdev;
const struct ib_device_attr *attrs = &sdev->device->attrs;
- u32 srp_sq_size = sport->port_attrib.srp_sq_size;
+ int sq_size = sport->port_attrib.srp_sq_size;
int i, ret;
WARN_ON(ch->rq_size < 1);
@@ -1638,12 +1712,12 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
goto out;
retry:
- ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+ ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size,
0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
pr_err("failed to create CQ cqe= %d ret= %d\n",
- ch->rq_size + srp_sq_size, ret);
+ ch->rq_size + sq_size, ret);
goto out;
}
@@ -1661,8 +1735,8 @@ retry:
* both both, as RDMA contexts will also post completions for the
* RDMA READ case.
*/
- qp_init->cap.max_send_wr = min(srp_sq_size / 2, attrs->max_qp_wr + 0U);
- qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
+ qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr);
+ qp_init->cap.max_rdma_ctxs = sq_size / 2;
qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
qp_init->port_num = ch->sport->port;
if (sdev->use_srq) {
@@ -1676,8 +1750,8 @@ retry:
if (IS_ERR(ch->qp)) {
ret = PTR_ERR(ch->qp);
if (ret == -ENOMEM) {
- srp_sq_size /= 2;
- if (srp_sq_size >= MIN_SRPT_SQ_SIZE) {
+ sq_size /= 2;
+ if (sq_size >= MIN_SRPT_SQ_SIZE) {
ib_destroy_cq(ch->cq);
goto retry;
}
@@ -1688,9 +1762,9 @@ retry:
atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);
- pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
+ pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d ch= %p\n",
__func__, ch->cq->cqe, qp_init->cap.max_send_sge,
- qp_init->cap.max_send_wr, ch->cm_id);
+ qp_init->cap.max_send_wr, ch);
ret = srpt_init_ch_qp(ch, ch->qp);
if (ret)
@@ -1718,7 +1792,8 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
}
/**
- * srpt_close_ch() - Close an RDMA channel.
+ * srpt_close_ch - close a RDMA channel
+ * @ch: SRPT RDMA channel.
*
* Make sure all resources associated with the channel will be deallocated at
* an appropriate time.
@@ -1743,8 +1818,6 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch)
pr_err("%s-%d: changing queue pair into error state failed: %d\n",
ch->sess_name, ch->qp->qp_num, ret);
- pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
- ch->qp->qp_num);
ret = srpt_zerolength_write(ch);
if (ret < 0) {
pr_err("%s-%d: queuing zero-length write failed: %d\n",
@@ -1776,9 +1849,9 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
if (!srpt_set_ch_state(ch, CH_DISCONNECTING))
return -ENOTCONN;
- ret = ib_send_cm_dreq(ch->cm_id, NULL, 0);
+ ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0);
if (ret < 0)
- ret = ib_send_cm_drep(ch->cm_id, NULL, 0);
+ ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0);
if (ret < 0 && srpt_close_ch(ch))
ret = 0;
@@ -1786,83 +1859,135 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
return ret;
}
-/*
- * Send DREQ and wait for DREP. Return true if and only if this function
- * changed the state of @ch.
- */
-static bool srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch)
- __must_hold(&sdev->mutex)
+static bool srpt_ch_closed(struct srpt_port *sport, struct srpt_rdma_ch *ch)
{
- DECLARE_COMPLETION_ONSTACK(release_done);
- struct srpt_device *sdev = ch->sport->sdev;
- bool wait;
+ struct srpt_nexus *nexus;
+ struct srpt_rdma_ch *ch2;
+ bool res = true;
+
+ rcu_read_lock();
+ list_for_each_entry(nexus, &sport->nexus_list, entry) {
+ list_for_each_entry(ch2, &nexus->ch_list, list) {
+ if (ch2 == ch) {
+ res = false;
+ goto done;
+ }
+ }
+ }
+done:
+ rcu_read_unlock();
- lockdep_assert_held(&sdev->mutex);
+ return res;
+}
+
+/* Send DREQ and wait for DREP. */
+static void srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch)
+{
+ struct srpt_port *sport = ch->sport;
pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
ch->state);
- WARN_ON(ch->release_done);
- ch->release_done = &release_done;
- wait = !list_empty(&ch->list);
+ mutex_lock(&sport->mutex);
srpt_disconnect_ch(ch);
- mutex_unlock(&sdev->mutex);
+ mutex_unlock(&sport->mutex);
- if (!wait)
- goto out;
-
- while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0)
+ while (wait_event_timeout(sport->ch_releaseQ, srpt_ch_closed(sport, ch),
+ 5 * HZ) == 0)
pr_info("%s(%s-%d state %d): still waiting ...\n", __func__,
ch->sess_name, ch->qp->qp_num, ch->state);
-out:
- mutex_lock(&sdev->mutex);
- return wait;
}
-static void srpt_set_enabled(struct srpt_port *sport, bool enabled)
- __must_hold(&sdev->mutex)
+static void __srpt_close_all_ch(struct srpt_port *sport)
{
- struct srpt_device *sdev = sport->sdev;
+ struct srpt_nexus *nexus;
struct srpt_rdma_ch *ch;
- lockdep_assert_held(&sdev->mutex);
+ lockdep_assert_held(&sport->mutex);
- if (sport->enabled == enabled)
- return;
- sport->enabled = enabled;
- if (sport->enabled)
- return;
+ list_for_each_entry(nexus, &sport->nexus_list, entry) {
+ list_for_each_entry(ch, &nexus->ch_list, list) {
+ if (srpt_disconnect_ch(ch) >= 0)
+ pr_info("Closing channel %s-%d because target %s_%d has been disabled\n",
+ ch->sess_name, ch->qp->qp_num,
+ sport->sdev->device->name, sport->port);
+ srpt_close_ch(ch);
+ }
+ }
+}
-again:
- list_for_each_entry(ch, &sdev->rch_list, list) {
- if (ch->sport == sport) {
- pr_info("%s: closing channel %s-%d\n",
- sdev->device->name, ch->sess_name,
- ch->qp->qp_num);
- if (srpt_disconnect_ch_sync(ch))
- goto again;
+/*
+ * Look up (i_port_id, t_port_id) in sport->nexus_list. Create an entry if
+ * it does not yet exist.
+ */
+static struct srpt_nexus *srpt_get_nexus(struct srpt_port *sport,
+ const u8 i_port_id[16],
+ const u8 t_port_id[16])
+{
+ struct srpt_nexus *nexus = NULL, *tmp_nexus = NULL, *n;
+
+ for (;;) {
+ mutex_lock(&sport->mutex);
+ list_for_each_entry(n, &sport->nexus_list, entry) {
+ if (memcmp(n->i_port_id, i_port_id, 16) == 0 &&
+ memcmp(n->t_port_id, t_port_id, 16) == 0) {
+ nexus = n;
+ break;
+ }
}
+ if (!nexus && tmp_nexus) {
+ list_add_tail_rcu(&tmp_nexus->entry,
+ &sport->nexus_list);
+ swap(nexus, tmp_nexus);
+ }
+ mutex_unlock(&sport->mutex);
+
+ if (nexus)
+ break;
+ tmp_nexus = kzalloc(sizeof(*nexus), GFP_KERNEL);
+ if (!tmp_nexus) {
+ nexus = ERR_PTR(-ENOMEM);
+ break;
+ }
+ INIT_LIST_HEAD(&tmp_nexus->ch_list);
+ memcpy(tmp_nexus->i_port_id, i_port_id, 16);
+ memcpy(tmp_nexus->t_port_id, t_port_id, 16);
}
+ kfree(tmp_nexus);
+
+ return nexus;
+}
+
+static void srpt_set_enabled(struct srpt_port *sport, bool enabled)
+ __must_hold(&sport->mutex)
+{
+ lockdep_assert_held(&sport->mutex);
+
+ if (sport->enabled == enabled)
+ return;
+ sport->enabled = enabled;
+ if (!enabled)
+ __srpt_close_all_ch(sport);
}
static void srpt_free_ch(struct kref *kref)
{
struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref);
- kfree(ch);
+ kfree_rcu(ch, rcu);
}
static void srpt_release_channel_work(struct work_struct *w)
{
struct srpt_rdma_ch *ch;
struct srpt_device *sdev;
+ struct srpt_port *sport;
struct se_session *se_sess;
ch = container_of(w, struct srpt_rdma_ch, release_work);
- pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name,
- ch->qp->qp_num, ch->release_done);
+ pr_debug("%s-%d\n", ch->sess_name, ch->qp->qp_num);
sdev = ch->sport->sdev;
BUG_ON(!sdev);
@@ -1877,169 +2002,141 @@ static void srpt_release_channel_work(struct work_struct *w)
transport_deregister_session(se_sess);
ch->sess = NULL;
- ib_destroy_cm_id(ch->cm_id);
+ ib_destroy_cm_id(ch->ib_cm.cm_id);
srpt_destroy_ch_ib(ch);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size,
- ch->rsp_size, DMA_TO_DEVICE);
+ ch->max_rsp_size, DMA_TO_DEVICE);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
sdev, ch->rq_size,
srp_max_req_size, DMA_FROM_DEVICE);
- mutex_lock(&sdev->mutex);
- list_del_init(&ch->list);
- if (ch->release_done)
- complete(ch->release_done);
- mutex_unlock(&sdev->mutex);
+ sport = ch->sport;
+ mutex_lock(&sport->mutex);
+ list_del_rcu(&ch->list);
+ mutex_unlock(&sport->mutex);
- wake_up(&sdev->ch_releaseQ);
+ wake_up(&sport->ch_releaseQ);
kref_put(&ch->kref, srpt_free_ch);
}
/**
- * srpt_cm_req_recv() - Process the event IB_CM_REQ_RECEIVED.
+ * srpt_cm_req_recv - process the event IB_CM_REQ_RECEIVED
+ * @cm_id: IB/CM connection identifier.
+ * @port_num: Port through which the IB/CM REQ message was received.
+ * @pkey: P_Key of the incoming connection.
+ * @req: SRP login request.
+ * @src_addr: GID of the port that submitted the login request.
*
* Ownership of the cm_id is transferred to the target session if this
* functions returns zero. Otherwise the caller remains the owner of cm_id.
*/
static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
- struct ib_cm_req_event_param *param,
- void *private_data)
+ u8 port_num, __be16 pkey,
+ const struct srp_login_req *req,
+ const char *src_addr)
{
struct srpt_device *sdev = cm_id->context;
- struct srpt_port *sport = &sdev->port[param->port - 1];
- struct srp_login_req *req;
- struct srp_login_rsp *rsp;
- struct srp_login_rej *rej;
- struct ib_cm_rep_param *rep_param;
- struct srpt_rdma_ch *ch, *tmp_ch;
- __be16 *guid;
+ struct srpt_port *sport = &sdev->port[port_num - 1];
+ struct srpt_nexus *nexus;
+ struct srp_login_rsp *rsp = NULL;
+ struct srp_login_rej *rej = NULL;
+ struct ib_cm_rep_param *rep_param = NULL;
+ struct srpt_rdma_ch *ch;
+ char i_port_id[36];
u32 it_iu_len;
- int i, ret = 0;
+ int i, ret;
WARN_ON_ONCE(irqs_disabled());
- if (WARN_ON(!sdev || !private_data))
+ if (WARN_ON(!sdev || !req))
return -EINVAL;
- req = (struct srp_login_req *)private_data;
-
it_iu_len = be32_to_cpu(req->req_it_iu_len);
- pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
- " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
- " (guid=0x%llx:0x%llx)\n",
- be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
- be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
- be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
- be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
- it_iu_len,
- param->port,
- be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
- be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
+ pr_info("Received SRP_LOGIN_REQ with i_port_id %pI6, t_port_id %pI6 and it_iu_len %d on port %d (guid=%pI6); pkey %#04x\n",
+ req->initiator_port_id, req->target_port_id, it_iu_len,
+ port_num, &sport->gid, be16_to_cpu(pkey));
+ nexus = srpt_get_nexus(sport, req->initiator_port_id,
+ req->target_port_id);
+ if (IS_ERR(nexus)) {
+ ret = PTR_ERR(nexus);
+ goto out;
+ }
+
+ ret = -ENOMEM;
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
rej = kzalloc(sizeof(*rej), GFP_KERNEL);
rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL);
-
- if (!rsp || !rej || !rep_param) {
- ret = -ENOMEM;
+ if (!rsp || !rej || !rep_param)
goto out;
- }
+ ret = -EINVAL;
if (it_iu_len > srp_max_req_size || it_iu_len < 64) {
rej->reason = cpu_to_be32(
- SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
- ret = -EINVAL;
- pr_err("rejected SRP_LOGIN_REQ because its"
- " length (%d bytes) is out of range (%d .. %d)\n",
+ SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
+ pr_err("rejected SRP_LOGIN_REQ because its length (%d bytes) is out of range (%d .. %d)\n",
it_iu_len, 64, srp_max_req_size);
goto reject;
}
if (!sport->enabled) {
- rej->reason = cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- ret = -EINVAL;
- pr_err("rejected SRP_LOGIN_REQ because the target port"
- " has not yet been enabled\n");
+ rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n",
+ sport->sdev->device->name, port_num);
goto reject;
}
- if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
-
- mutex_lock(&sdev->mutex);
-
- list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
- if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
- && !memcmp(ch->t_port_id, req->target_port_id, 16)
- && param->port == ch->sport->port
- && param->listen_id == ch->sport->sdev->cm_id
- && ch->cm_id) {
- if (srpt_disconnect_ch(ch) < 0)
- continue;
- pr_info("Relogin - closed existing channel %s\n",
- ch->sess_name);
- rsp->rsp_flags =
- SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
- }
- }
-
- mutex_unlock(&sdev->mutex);
-
- } else
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
-
if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid)
|| *(__be64 *)(req->target_port_id + 8) !=
cpu_to_be64(srpt_service_guid)) {
rej->reason = cpu_to_be32(
- SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
- ret = -ENOMEM;
- pr_err("rejected SRP_LOGIN_REQ because it"
- " has an invalid target port identifier.\n");
+ SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
+ pr_err("rejected SRP_LOGIN_REQ because it has an invalid target port identifier.\n");
goto reject;
}
+ ret = -ENOMEM;
ch = kzalloc(sizeof(*ch), GFP_KERNEL);
if (!ch) {
- rej->reason = cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
- ret = -ENOMEM;
+ rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ pr_err("rejected SRP_LOGIN_REQ because out of memory.\n");
goto reject;
}
kref_init(&ch->kref);
+ ch->pkey = be16_to_cpu(pkey);
+ ch->nexus = nexus;
ch->zw_cqe.done = srpt_zerolength_write_done;
INIT_WORK(&ch->release_work, srpt_release_channel_work);
- memcpy(ch->i_port_id, req->initiator_port_id, 16);
- memcpy(ch->t_port_id, req->target_port_id, 16);
- ch->sport = &sdev->port[param->port - 1];
- ch->cm_id = cm_id;
+ ch->sport = sport;
+ ch->ib_cm.cm_id = cm_id;
cm_id->context = ch;
/*
* ch->rq_size should be at least as large as the initiator queue
* depth to avoid that the initiator driver has to report QUEUE_FULL
* to the SCSI mid-layer.
*/
- ch->rq_size = min(SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr);
+ ch->rq_size = min(MAX_SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr);
spin_lock_init(&ch->spinlock);
ch->state = CH_CONNECTING;
INIT_LIST_HEAD(&ch->cmd_wait_list);
- ch->rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
+ ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
ch->ioctx_ring = (struct srpt_send_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
sizeof(*ch->ioctx_ring[0]),
- ch->rsp_size, DMA_TO_DEVICE);
- if (!ch->ioctx_ring)
+ ch->max_rsp_size, DMA_TO_DEVICE);
+ if (!ch->ioctx_ring) {
+ pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n");
+ rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
goto free_ch;
+ }
INIT_LIST_HEAD(&ch->free_list);
for (i = 0; i < ch->rq_size; i++) {
@@ -2058,59 +2155,88 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
goto free_ring;
}
+ for (i = 0; i < ch->rq_size; i++)
+ INIT_LIST_HEAD(&ch->ioctx_recv_ring[i]->wait_list);
}
ret = srpt_create_ch_ib(ch);
if (ret) {
- rej->reason = cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- pr_err("rejected SRP_LOGIN_REQ because creating"
- " a new RDMA channel failed.\n");
- goto free_recv_ring;
- }
-
- ret = srpt_ch_qp_rtr(ch, ch->qp);
- if (ret) {
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- pr_err("rejected SRP_LOGIN_REQ because enabling"
- " RTR failed (error code = %d)\n", ret);
- goto destroy_ib;
+ pr_err("rejected SRP_LOGIN_REQ because creating a new RDMA channel failed.\n");
+ goto free_recv_ring;
}
- guid = (__be16 *)&param->primary_path->dgid.global.interface_id;
- snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
- be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
- be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
- snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
- be64_to_cpu(*(__be64 *)ch->i_port_id),
- be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
+ strlcpy(ch->sess_name, src_addr, sizeof(ch->sess_name));
+ snprintf(i_port_id, sizeof(i_port_id), "0x%016llx%016llx",
+ be64_to_cpu(*(__be64 *)nexus->i_port_id),
+ be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8)));
pr_debug("registering session %s\n", ch->sess_name);
if (sport->port_guid_tpg.se_tpg_wwn)
ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
TARGET_PROT_NORMAL,
- ch->ini_guid, ch, NULL);
+ ch->sess_name, ch, NULL);
if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
- TARGET_PROT_NORMAL, ch->sess_name, ch,
+ TARGET_PROT_NORMAL, i_port_id, ch,
NULL);
/* Retry without leading "0x" */
if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL,
- ch->sess_name + 2, ch, NULL);
+ i_port_id + 2, ch, NULL);
if (IS_ERR_OR_NULL(ch->sess)) {
- pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
- ch->sess_name);
- rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
+ ret = PTR_ERR(ch->sess);
+ pr_info("Rejected login for initiator %s: ret = %d.\n",
+ ch->sess_name, ret);
+ rej->reason = cpu_to_be32(ret == -ENOMEM ?
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
+ goto reject;
+ }
+
+ mutex_lock(&sport->mutex);
+
+ if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
+ struct srpt_rdma_ch *ch2;
+
+ rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
+
+ list_for_each_entry(ch2, &nexus->ch_list, list) {
+ if (srpt_disconnect_ch(ch2) < 0)
+ continue;
+ pr_info("Relogin - closed existing channel %s\n",
+ ch2->sess_name);
+ rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
+ }
+ } else {
+ rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
+ }
+
+ list_add_tail_rcu(&ch->list, &nexus->ch_list);
+
+ if (!sport->enabled) {
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n",
+ sdev->device->name, port_num);
+ mutex_unlock(&sport->mutex);
+ goto reject;
+ }
+
+ mutex_unlock(&sport->mutex);
+
+ ret = srpt_ch_qp_rtr(ch, ch->qp);
+ if (ret) {
+ rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n",
+ ret);
goto destroy_ib;
}
- pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess,
- ch->sess_name, ch->cm_id);
+ pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess,
+ ch->sess_name, ch);
/* create srp_login_response */
rsp->opcode = SRP_LOGIN_RSP;
@@ -2118,8 +2244,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rsp->max_it_iu_len = req->req_it_iu_len;
rsp->max_ti_iu_len = req->req_it_iu_len;
ch->max_ti_iu_len = it_iu_len;
- rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT
- | SRP_BUF_FORMAT_INDIRECT);
+ rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
+ SRP_BUF_FORMAT_INDIRECT);
rsp->req_lim_delta = cpu_to_be32(ch->rq_size);
atomic_set(&ch->req_lim, ch->rq_size);
atomic_set(&ch->req_lim_delta, 0);
@@ -2135,25 +2261,31 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rep_param->responder_resources = 4;
rep_param->initiator_depth = 4;
- ret = ib_send_cm_rep(cm_id, rep_param);
- if (ret) {
- pr_err("sending SRP_LOGIN_REQ response failed"
- " (error code = %d)\n", ret);
- goto release_channel;
- }
+ /*
+ * Hold the sport mutex while accepting a connection to avoid that
+ * srpt_disconnect_ch() is invoked concurrently with this code.
+ */
+ mutex_lock(&sport->mutex);
+ if (sport->enabled && ch->state == CH_CONNECTING)
+ ret = ib_send_cm_rep(cm_id, rep_param);
+ else
+ ret = -EINVAL;
+ mutex_unlock(&sport->mutex);
- mutex_lock(&sdev->mutex);
- list_add_tail(&ch->list, &sdev->rch_list);
- mutex_unlock(&sdev->mutex);
+ switch (ret) {
+ case 0:
+ break;
+ case -EINVAL:
+ goto reject;
+ default:
+ rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ pr_err("sending SRP_LOGIN_REQ response failed (error code = %d)\n",
+ ret);
+ goto reject;
+ }
goto out;
-release_channel:
- srpt_disconnect_ch(ch);
- transport_deregister_session_configfs(ch->sess);
- transport_deregister_session(ch->sess);
- ch->sess = NULL;
-
destroy_ib:
srpt_destroy_ch_ib(ch);
@@ -2165,15 +2297,20 @@ free_recv_ring:
free_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size,
- ch->rsp_size, DMA_TO_DEVICE);
+ ch->max_rsp_size, DMA_TO_DEVICE);
free_ch:
+ cm_id->context = NULL;
kfree(ch);
+ ch = NULL;
+
+ WARN_ON_ONCE(ret == 0);
reject:
+ pr_info("Rejecting login with reason %#x\n", be32_to_cpu(rej->reason));
rej->opcode = SRP_LOGIN_REJ;
rej->tag = req->tag;
- rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT
- | SRP_BUF_FORMAT_INDIRECT);
+ rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
+ SRP_BUF_FORMAT_INDIRECT);
ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
(void *)rej, sizeof(*rej));
@@ -2186,6 +2323,19 @@ out:
return ret;
}
+static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id,
+ struct ib_cm_req_event_param *param,
+ void *private_data)
+{
+ char sguid[40];
+
+ srpt_format_guid(sguid, sizeof(sguid),
+ &param->primary_path->dgid.global.interface_id);
+
+ return srpt_cm_req_recv(cm_id, param->port, param->primary_path->pkey,
+ private_data, sguid);
+}
+
static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
enum ib_cm_rej_reason reason,
const u8 *private_data,
@@ -2206,7 +2356,8 @@ static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
}
/**
- * srpt_cm_rtu_recv() - Process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event.
+ * srpt_cm_rtu_recv - process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event
+ * @ch: SRPT RDMA channel.
*
* An IB_CM_RTU_RECEIVED message indicates that the connection is established
* and that the recipient may begin transmitting (RTU = ready to use).
@@ -2215,21 +2366,34 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
{
int ret;
- if (srpt_set_ch_state(ch, CH_LIVE)) {
- ret = srpt_ch_qp_rts(ch, ch->qp);
+ ret = srpt_ch_qp_rts(ch, ch->qp);
+ if (ret < 0) {
+ pr_err("%s-%d: QP transition to RTS failed\n", ch->sess_name,
+ ch->qp->qp_num);
+ srpt_close_ch(ch);
+ return;
+ }
- if (ret == 0) {
- /* Trigger wait list processing. */
- ret = srpt_zerolength_write(ch);
- WARN_ONCE(ret < 0, "%d\n", ret);
- } else {
- srpt_close_ch(ch);
- }
+ /*
+ * Note: calling srpt_close_ch() if the transition to the LIVE state
+ * fails is not necessary since that means that that function has
+ * already been invoked from another thread.
+ */
+ if (!srpt_set_ch_state(ch, CH_LIVE)) {
+ pr_err("%s-%d: channel transition to LIVE state failed\n",
+ ch->sess_name, ch->qp->qp_num);
+ return;
}
+
+ /* Trigger wait list processing. */
+ ret = srpt_zerolength_write(ch);
+ WARN_ONCE(ret < 0, "%d\n", ret);
}
/**
- * srpt_cm_handler() - IB connection manager callback function.
+ * srpt_cm_handler - IB connection manager callback function
+ * @cm_id: IB/CM connection identifier.
+ * @event: IB/CM event.
*
* A non-zero return value will cause the caller destroy the CM ID.
*
@@ -2246,8 +2410,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
ret = 0;
switch (event->event) {
case IB_CM_REQ_RECEIVED:
- ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
- event->private_data);
+ ret = srpt_ib_cm_req_recv(cm_id, &event->param.req_rcvd,
+ event->private_data);
break;
case IB_CM_REJ_RECEIVED:
srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason,
@@ -2294,11 +2458,11 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd)
struct srpt_send_ioctx *ioctx;
ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
- return srpt_get_cmd_state(ioctx) == SRPT_STATE_NEED_DATA;
+ return ioctx->state == SRPT_STATE_NEED_DATA;
}
/*
- * srpt_write_pending() - Start data transfer from initiator to target (write).
+ * srpt_write_pending - Start data transfer from initiator to target (write).
*/
static int srpt_write_pending(struct se_cmd *se_cmd)
{
@@ -2355,7 +2519,8 @@ static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
}
/**
- * srpt_queue_response() - Transmits the response to a SCSI command.
+ * srpt_queue_response - transmit the response to a SCSI command
+ * @cmd: SCSI target command.
*
* Callback function called by the TCM core. Must not block since it can be
* invoked on the context of the IB completion handler.
@@ -2369,13 +2534,11 @@ static void srpt_queue_response(struct se_cmd *cmd)
struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr;
struct ib_sge sge;
enum srpt_command_state state;
- unsigned long flags;
int resp_len, ret, i;
u8 srp_tm_status;
BUG_ON(!ch);
- spin_lock_irqsave(&ioctx->spinlock, flags);
state = ioctx->state;
switch (state) {
case SRPT_STATE_NEW:
@@ -2390,7 +2553,6 @@ static void srpt_queue_response(struct se_cmd *cmd)
ch, ioctx->ioctx.index, ioctx->state);
break;
}
- spin_unlock_irqrestore(&ioctx->spinlock, flags);
if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
return;
@@ -2494,26 +2656,56 @@ static void srpt_refresh_port_work(struct work_struct *work)
srpt_refresh_port(sport);
}
+static bool srpt_ch_list_empty(struct srpt_port *sport)
+{
+ struct srpt_nexus *nexus;
+ bool res = true;
+
+ rcu_read_lock();
+ list_for_each_entry(nexus, &sport->nexus_list, entry)
+ if (!list_empty(&nexus->ch_list))
+ res = false;
+ rcu_read_unlock();
+
+ return res;
+}
+
/**
- * srpt_release_sdev() - Free the channel resources associated with a target.
+ * srpt_release_sport - disable login and wait for associated channels
+ * @sport: SRPT HCA port.
*/
-static int srpt_release_sdev(struct srpt_device *sdev)
+static int srpt_release_sport(struct srpt_port *sport)
{
- int i, res;
+ struct srpt_nexus *nexus, *next_n;
+ struct srpt_rdma_ch *ch;
WARN_ON_ONCE(irqs_disabled());
- BUG_ON(!sdev);
-
- mutex_lock(&sdev->mutex);
- for (i = 0; i < ARRAY_SIZE(sdev->port); i++)
- srpt_set_enabled(&sdev->port[i], false);
- mutex_unlock(&sdev->mutex);
+ mutex_lock(&sport->mutex);
+ srpt_set_enabled(sport, false);
+ mutex_unlock(&sport->mutex);
+
+ while (wait_event_timeout(sport->ch_releaseQ,
+ srpt_ch_list_empty(sport), 5 * HZ) <= 0) {
+ pr_info("%s_%d: waiting for session unregistration ...\n",
+ sport->sdev->device->name, sport->port);
+ rcu_read_lock();
+ list_for_each_entry(nexus, &sport->nexus_list, entry) {
+ list_for_each_entry(ch, &nexus->ch_list, list) {
+ pr_info("%s-%d: state %s\n",
+ ch->sess_name, ch->qp->qp_num,
+ get_ch_state_name(ch->state));
+ }
+ }
+ rcu_read_unlock();
+ }
- res = wait_event_interruptible(sdev->ch_releaseQ,
- list_empty_careful(&sdev->rch_list));
- if (res)
- pr_err("%s: interrupted.\n", __func__);
+ mutex_lock(&sport->mutex);
+ list_for_each_entry_safe(nexus, next_n, &sport->nexus_list, entry) {
+ list_del(&nexus->entry);
+ kfree_rcu(nexus, rcu);
+ }
+ mutex_unlock(&sport->mutex);
return 0;
}
@@ -2600,8 +2792,10 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
sdev->use_srq = true;
sdev->srq = srq;
- for (i = 0; i < sdev->srq_size; ++i)
+ for (i = 0; i < sdev->srq_size; ++i) {
+ INIT_LIST_HEAD(&sdev->ioctx_ring[i]->wait_list);
srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]);
+ }
return 0;
}
@@ -2623,7 +2817,8 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
}
/**
- * srpt_add_one() - Infiniband device addition callback function.
+ * srpt_add_one - InfiniBand device addition callback function
+ * @device: Describes a HCA.
*/
static void srpt_add_one(struct ib_device *device)
{
@@ -2638,9 +2833,7 @@ static void srpt_add_one(struct ib_device *device)
goto err;
sdev->device = device;
- INIT_LIST_HEAD(&sdev->rch_list);
- init_waitqueue_head(&sdev->ch_releaseQ);
- mutex_init(&sdev->mutex);
+ mutex_init(&sdev->sdev_mutex);
sdev->pd = ib_alloc_pd(device, 0);
if (IS_ERR(sdev->pd))
@@ -2681,6 +2874,9 @@ static void srpt_add_one(struct ib_device *device)
for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
sport = &sdev->port[i - 1];
+ INIT_LIST_HEAD(&sport->nexus_list);
+ init_waitqueue_head(&sport->ch_releaseQ);
+ mutex_init(&sport->mutex);
sport->sdev = sdev;
sport->port = i;
sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
@@ -2721,7 +2917,9 @@ err:
}
/**
- * srpt_remove_one() - InfiniBand device removal callback function.
+ * srpt_remove_one - InfiniBand device removal callback function
+ * @device: Describes a HCA.
+ * @client_data: The value passed as the third argument to ib_set_client_data().
*/
static void srpt_remove_one(struct ib_device *device, void *client_data)
{
@@ -2751,7 +2949,9 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
spin_lock(&srpt_dev_lock);
list_del(&sdev->list);
spin_unlock(&srpt_dev_lock);
- srpt_release_sdev(sdev);
+
+ for (i = 0; i < sdev->device->phys_port_cnt; i++)
+ srpt_release_sport(&sdev->port[i]);
srpt_free_srq(sdev);
@@ -2827,7 +3027,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
}
/**
- * srpt_close_session() - Forcibly close a session.
+ * srpt_close_session - forcibly close a session
+ * @se_sess: SCSI target session.
*
* Callback function invoked by the TCM core to clean up sessions associated
* with a node ACL when the user invokes
@@ -2836,15 +3037,13 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
static void srpt_close_session(struct se_session *se_sess)
{
struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
- struct srpt_device *sdev = ch->sport->sdev;
- mutex_lock(&sdev->mutex);
srpt_disconnect_ch_sync(ch);
- mutex_unlock(&sdev->mutex);
}
/**
- * srpt_sess_get_index() - Return the value of scsiAttIntrPortIndex (SCSI-MIB).
+ * srpt_sess_get_index - return the value of scsiAttIntrPortIndex (SCSI-MIB)
+ * @se_sess: SCSI target session.
*
* A quote from RFC 4455 (SCSI-MIB) about this MIB object:
* This object represents an arbitrary integer used to uniquely identify a
@@ -2866,7 +3065,7 @@ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd)
struct srpt_send_ioctx *ioctx;
ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
- return srpt_get_cmd_state(ioctx);
+ return ioctx->state;
}
static int srpt_parse_guid(u64 *guid, const char *name)
@@ -2883,7 +3082,7 @@ out:
}
/**
- * srpt_parse_i_port_id() - Parse an initiator port ID.
+ * srpt_parse_i_port_id - parse an initiator port ID
* @name: ASCII representation of a 128-bit initiator port ID.
* @i_port_id: Binary 128-bit port ID.
*/
@@ -3064,18 +3263,24 @@ static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item,
if (val != !!val)
return -EINVAL;
- ret = mutex_lock_interruptible(&sdev->mutex);
+ ret = mutex_lock_interruptible(&sdev->sdev_mutex);
if (ret < 0)
return ret;
+ ret = mutex_lock_interruptible(&sport->mutex);
+ if (ret < 0)
+ goto unlock_sdev;
enabled = sport->enabled;
/* Log out all initiator systems before changing 'use_srq'. */
srpt_set_enabled(sport, false);
sport->port_attrib.use_srq = val;
srpt_use_srq(sdev, sport->port_attrib.use_srq);
srpt_set_enabled(sport, enabled);
- mutex_unlock(&sdev->mutex);
+ ret = count;
+ mutex_unlock(&sport->mutex);
+unlock_sdev:
+ mutex_unlock(&sdev->sdev_mutex);
- return count;
+ return ret;
}
CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size);
@@ -3104,7 +3309,6 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
{
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- struct srpt_device *sdev = sport->sdev;
unsigned long tmp;
int ret;
@@ -3119,9 +3323,9 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
return -EINVAL;
}
- mutex_lock(&sdev->mutex);
+ mutex_lock(&sport->mutex);
srpt_set_enabled(sport, tmp);
- mutex_unlock(&sdev->mutex);
+ mutex_unlock(&sport->mutex);
return count;
}
@@ -3134,8 +3338,10 @@ static struct configfs_attribute *srpt_tpg_attrs[] = {
};
/**
- * configfs callback invoked for
- * mkdir /sys/kernel/config/target/$driver/$port/$tpg
+ * srpt_make_tpg - configfs callback invoked for mkdir /sys/kernel/config/target/$driver/$port/$tpg
+ * @wwn: Corresponds to $driver/$port.
+ * @group: Not used.
+ * @name: $tpg.
*/
static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
struct config_group *group,
@@ -3157,8 +3363,8 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
}
/**
- * configfs callback invoked for
- * rmdir /sys/kernel/config/target/$driver/$port/$tpg
+ * srpt_drop_tpg - configfs callback invoked for rmdir /sys/kernel/config/target/$driver/$port/$tpg
+ * @tpg: Target portal group to deregister.
*/
static void srpt_drop_tpg(struct se_portal_group *tpg)
{
@@ -3169,8 +3375,10 @@ static void srpt_drop_tpg(struct se_portal_group *tpg)
}
/**
- * configfs callback invoked for
- * mkdir /sys/kernel/config/target/$driver/$port
+ * srpt_make_tport - configfs callback invoked for mkdir /sys/kernel/config/target/$driver/$port
+ * @tf: Not used.
+ * @group: Not used.
+ * @name: $port.
*/
static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
struct config_group *group,
@@ -3180,8 +3388,8 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
}
/**
- * configfs callback invoked for
- * rmdir /sys/kernel/config/target/$driver/$port
+ * srpt_drop_tport - configfs callback invoked for rmdir /sys/kernel/config/target/$driver/$port
+ * @wwn: $port.
*/
static void srpt_drop_tport(struct se_wwn *wwn)
{
@@ -3239,7 +3447,7 @@ static const struct target_core_fabric_ops srpt_template = {
};
/**
- * srpt_init_module() - Kernel module initialization.
+ * srpt_init_module - kernel module initialization
*
* Note: Since ib_register_client() registers callback functions, and since at
* least one of these callback functions (srpt_add_one()) calls target core
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 673387d365a3..4d9199fd00dc 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -54,6 +54,8 @@
*/
#define SRP_SERVICE_NAME_PREFIX "SRP.T10:"
+struct srpt_nexus;
+
enum {
/*
* SRP IOControllerProfile attributes for SRP target ports that have
@@ -114,7 +116,7 @@ enum {
MIN_SRPT_SQ_SIZE = 16,
DEF_SRPT_SQ_SIZE = 4096,
- SRPT_RQ_SIZE = 128,
+ MAX_SRPT_RQ_SIZE = 128,
MIN_SRPT_SRQ_SIZE = 4,
DEFAULT_SRPT_SRQ_SIZE = 4095,
MAX_SRPT_SRQ_SIZE = 65535,
@@ -134,7 +136,7 @@ enum {
};
/**
- * enum srpt_command_state - SCSI command state managed by SRPT.
+ * enum srpt_command_state - SCSI command state managed by SRPT
* @SRPT_STATE_NEW: New command arrived and is being processed.
* @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting
* for data arrival.
@@ -158,7 +160,8 @@ enum srpt_command_state {
};
/**
- * struct srpt_ioctx - Shared SRPT I/O context information.
+ * struct srpt_ioctx - shared SRPT I/O context information
+ * @cqe: Completion queue element.
* @buf: Pointer to the buffer.
* @dma: DMA address of the buffer.
* @index: Index of the I/O context in its ioctx_ring array.
@@ -171,7 +174,7 @@ struct srpt_ioctx {
};
/**
- * struct srpt_recv_ioctx - SRPT receive I/O context.
+ * struct srpt_recv_ioctx - SRPT receive I/O context
* @ioctx: See above.
* @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list.
*/
@@ -187,13 +190,20 @@ struct srpt_rw_ctx {
};
/**
- * struct srpt_send_ioctx - SRPT send I/O context.
+ * struct srpt_send_ioctx - SRPT send I/O context
* @ioctx: See above.
* @ch: Channel pointer.
- * @spinlock: Protects 'state'.
+ * @s_rw_ctx: @rw_ctxs points here if only a single rw_ctx is needed.
+ * @rw_ctxs: RDMA read/write contexts.
+ * @rdma_cqe: RDMA completion queue element.
+ * @free_list: Node in srpt_rdma_ch.free_list.
* @state: I/O context state.
* @cmd: Target core command data structure.
* @sense_data: SCSI sense data.
+ * @n_rdma: Number of work requests needed to transfer this ioctx.
+ * @n_rw_ctx: Size of rw_ctxs array.
+ * @queue_status_only: Send a SCSI status back to the initiator but no data.
+ * @sense_data: Sense data to be sent to the initiator.
*/
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
@@ -204,10 +214,8 @@ struct srpt_send_ioctx {
struct ib_cqe rdma_cqe;
struct list_head free_list;
- spinlock_t spinlock;
enum srpt_command_state state;
struct se_cmd cmd;
- struct completion tx_done;
u8 n_rdma;
u8 n_rw_ctx;
bool queue_status_only;
@@ -215,7 +223,7 @@ struct srpt_send_ioctx {
};
/**
- * enum rdma_ch_state - SRP channel state.
+ * enum rdma_ch_state - SRP channel state
* @CH_CONNECTING: QP is in RTR state; waiting for RTU.
* @CH_LIVE: QP is in RTS state.
* @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has
@@ -233,17 +241,19 @@ enum rdma_ch_state {
};
/**
- * struct srpt_rdma_ch - RDMA channel.
- * @cm_id: IB CM ID associated with the channel.
+ * struct srpt_rdma_ch - RDMA channel
+ * @nexus: I_T nexus this channel is associated with.
* @qp: IB queue pair used for communicating over this channel.
+ * @cm_id: IB CM ID associated with the channel.
* @cq: IB completion queue for this channel.
+ * @zw_cqe: Zero-length write CQE.
+ * @rcu: RCU head.
+ * @kref: kref for this channel.
* @rq_size: IB receive queue size.
- * @rsp_size IB response message size in bytes.
+ * @max_rsp_size: Maximum size of an RSP response message in bytes.
* @sq_wr_avail: number of work requests available in the send queue.
* @sport: pointer to the information of the HCA port used by this
* channel.
- * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ.
- * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ.
* @max_ti_iu_len: maximum target-to-initiator information unit length.
* @req_lim: request limit: maximum number of requests that may be sent
* by the initiator without having received a response.
@@ -251,30 +261,34 @@ enum rdma_ch_state {
* @spinlock: Protects free_list and state.
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
+ * @processing_wait_list: Whether or not cmd_wait_list is being processed.
* @ioctx_ring: Send ring.
* @ioctx_recv_ring: Receive I/O context ring.
- * @list: Node for insertion in the srpt_device.rch_list list.
+ * @list: Node in srpt_nexus.ch_list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
* list contains struct srpt_ioctx elements and is protected
* against concurrent modification by the cm_id spinlock.
+ * @pkey: P_Key of the IB partition for this SRP channel.
* @sess: Session information associated with this SRP channel.
* @sess_name: Session name.
- * @ini_guid: Initiator port GUID.
* @release_work: Allows scheduling of srpt_release_channel().
- * @release_done: Enables waiting for srpt_release_channel() completion.
*/
struct srpt_rdma_ch {
- struct ib_cm_id *cm_id;
+ struct srpt_nexus *nexus;
struct ib_qp *qp;
+ union {
+ struct {
+ struct ib_cm_id *cm_id;
+ } ib_cm;
+ };
struct ib_cq *cq;
struct ib_cqe zw_cqe;
+ struct rcu_head rcu;
struct kref kref;
int rq_size;
- u32 rsp_size;
+ u32 max_rsp_size;
atomic_t sq_wr_avail;
struct srpt_port *sport;
- u8 i_port_id[16];
- u8 t_port_id[16];
int max_ti_iu_len;
atomic_t req_lim;
atomic_t req_lim_delta;
@@ -285,15 +299,31 @@ struct srpt_rdma_ch {
struct srpt_recv_ioctx **ioctx_recv_ring;
struct list_head list;
struct list_head cmd_wait_list;
+ uint16_t pkey;
+ bool processing_wait_list;
struct se_session *sess;
- u8 sess_name[36];
- u8 ini_guid[24];
+ u8 sess_name[24];
struct work_struct release_work;
- struct completion *release_done;
};
/**
- * struct srpt_port_attib - Attributes for SRPT port
+ * struct srpt_nexus - I_T nexus
+ * @rcu: RCU head for this data structure.
+ * @entry: srpt_port.nexus_list list node.
+ * @ch_list: struct srpt_rdma_ch list. Protected by srpt_port.mutex.
+ * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ.
+ * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ.
+ */
+struct srpt_nexus {
+ struct rcu_head rcu;
+ struct list_head entry;
+ struct list_head ch_list;
+ u8 i_port_id[16];
+ u8 t_port_id[16];
+};
+
+/**
+ * struct srpt_port_attib - attributes for SRPT port
* @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
* @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
* @srp_sq_size: Shared receive queue (SRQ) size.
@@ -307,7 +337,7 @@ struct srpt_port_attrib {
};
/**
- * struct srpt_port - Information associated by SRPT with a single IB port.
+ * struct srpt_port - information associated by SRPT with a single IB port
* @sdev: backpointer to the HCA information.
* @mad_agent: per-port management datagram processing information.
* @enabled: Whether or not this target port is enabled.
@@ -323,7 +353,10 @@ struct srpt_port_attrib {
* @port_guid_wwn: WWN associated with target port GUID.
* @port_gid_tpg: TPG associated with target port GID.
* @port_gid_wwn: WWN associated with target port GID.
- * @port_acl_list: Head of the list with all node ACLs for this port.
+ * @port_attrib: Port attributes that can be accessed through configfs.
+ * @ch_releaseQ: Enables waiting for removal from nexus_list.
+ * @mutex: Protects nexus_list.
+ * @nexus_list: Nexus list. See also srpt_nexus.entry.
*/
struct srpt_port {
struct srpt_device *sdev;
@@ -341,21 +374,22 @@ struct srpt_port {
struct se_portal_group port_gid_tpg;
struct se_wwn port_gid_wwn;
struct srpt_port_attrib port_attrib;
+ wait_queue_head_t ch_releaseQ;
+ struct mutex mutex;
+ struct list_head nexus_list;
};
/**
- * struct srpt_device - Information associated by SRPT with a single HCA.
+ * struct srpt_device - information associated by SRPT with a single HCA
* @device: Backpointer to the struct ib_device managed by the IB core.
* @pd: IB protection domain.
* @lkey: L_Key (local key) with write access to all local memory.
* @srq: Per-HCA SRQ (shared receive queue).
* @cm_id: Connection identifier.
* @srq_size: SRQ size.
+ * @sdev_mutex: Serializes use_srq changes.
* @use_srq: Whether or not to use SRQ.
* @ioctx_ring: Per-HCA SRQ.
- * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
- * @ch_releaseQ: Enables waiting for removal from rch_list.
- * @mutex: Protects rch_list.
* @port: Information about the ports owned by this HCA.
* @event_handler: Per-HCA asynchronous IB event handler.
* @list: Node in srpt_dev_list.
@@ -367,11 +401,9 @@ struct srpt_device {
struct ib_srq *srq;
struct ib_cm_id *cm_id;
int srq_size;
+ struct mutex sdev_mutex;
bool use_srq;
struct srpt_recv_ioctx **ioctx_ring;
- struct list_head rch_list;
- wait_queue_head_t ch_releaseQ;
- struct mutex mutex;
struct srpt_port port[2];
struct ib_event_handler event_handler;
struct list_head list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 769598f7b6c8..3aaf4bad6c5a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -287,6 +287,9 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
u64 in_param = 0;
int err;
+ if (!cnt)
+ return;
+
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, base_qpn);
set_param_h(&in_param, cnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index e7e7cef2bde4..14d57828945d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -417,7 +417,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
mlx5_cq_completion(dev, cqn);
break;
-
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+ mlx5_rsc_event(dev, rsn, eqe->type);
+ break;
case MLX5_EVENT_TYPE_PATH_MIG:
case MLX5_EVENT_TYPE_COMM_EST:
case MLX5_EVENT_TYPE_SQ_DRAINED:
@@ -715,6 +719,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, fpga))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR);
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
+
err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index c4392f741c5f..e6175f8ac0e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -688,7 +688,7 @@ static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
- MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
@@ -727,7 +727,7 @@ static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
MLX5_SET(qpc, qpc, next_rcv_psn,
MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
- MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
@@ -888,7 +888,8 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
MLX5_ROCE_VERSION_2,
MLX5_ROCE_L3_TYPE_IPV6,
- remote_ip, remote_mac, true, 0);
+ remote_ip, remote_mac, true, 0,
+ MLX5_FPGA_PORT_NUM);
if (err) {
mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
ret = ERR_PTR(err);
@@ -954,7 +955,7 @@ err_cq:
mlx5_fpga_conn_destroy_cq(conn);
err_gid:
mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
- NULL, false, 0);
+ NULL, false, 0, MLX5_FPGA_PORT_NUM);
err_rsvd_gid:
mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
err:
@@ -982,7 +983,7 @@ void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
mlx5_fpga_conn_destroy_cq(conn);
mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
- NULL, NULL, false, 0);
+ NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM);
mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
kfree(conn);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 5ef1b56b6a96..9d11e92fb541 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -195,12 +195,20 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return 0;
}
-int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
{
u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0};
+ int i;
MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+
+ if (MLX5_CAP_GEN(dev, sw_owner_id)) {
+ for (i = 0; i < 4; i++)
+ MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i,
+ sw_owner_id[i]);
+ }
+
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index ee2f378c5030..a281d95ce17c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -189,7 +189,7 @@ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp
MLX5_QP_ENHANCED_ULP_STATELESS_MODE);
addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
- MLX5_SET(ads, addr_path, port, 1);
+ MLX5_SET(ads, addr_path, vhca_port_num, 1);
MLX5_SET(ads, addr_path, grh, 1);
ret = mlx5_core_create_qp(mdev, qp, in, inlen);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 5701f125e99c..e159243e0fcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -31,6 +31,8 @@
*/
#include <linux/clocksource.h>
+#include <linux/highmem.h>
+#include <rdma/mlx5-abi.h>
#include "en.h"
enum {
@@ -71,6 +73,28 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
return mlx5_read_internal_timer(mdev) & cc->mask;
}
+static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 sign;
+
+ if (!clock_info)
+ return;
+
+ sign = smp_load_acquire(&clock_info->sign);
+ smp_store_mb(clock_info->sign,
+ sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
+
+ clock_info->cycles = clock->tc.cycle_last;
+ clock_info->mult = clock->cycles.mult;
+ clock_info->nsec = clock->tc.nsec;
+ clock_info->frac = clock->tc.frac;
+
+ smp_store_release(&clock_info->sign,
+ sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
+}
+
static void mlx5_pps_out(struct work_struct *work)
{
struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
@@ -109,6 +133,7 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
write_lock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
}
@@ -123,6 +148,7 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
write_lock_irqsave(&clock->lock, flags);
timecounter_init(&clock->tc, &clock->cycles, ns);
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
return 0;
@@ -152,6 +178,7 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
write_lock_irqsave(&clock->lock, flags);
timecounter_adjtime(&clock->tc, delta);
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
return 0;
@@ -179,6 +206,7 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
timecounter_read(&clock->tc);
clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
clock->nominal_c_mult + diff;
+ mlx5_update_clock_info_page(clock->mdev);
write_unlock_irqrestore(&clock->lock, flags);
return 0;
@@ -474,6 +502,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
clock->cycles.shift);
clock->nominal_c_mult = clock->cycles.mult;
clock->cycles.mask = CLOCKSOURCE_MASK(41);
+ clock->mdev = mdev;
timecounter_init(&clock->tc, &clock->cycles,
ktime_to_ns(ktime_get_real()));
@@ -486,6 +515,25 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
do_div(ns, NSEC_PER_SEC / 2 / HZ);
clock->overflow_period = ns;
+ mdev->clock_info_page = alloc_page(GFP_KERNEL);
+ if (mdev->clock_info_page) {
+ mdev->clock_info = kmap(mdev->clock_info_page);
+ if (!mdev->clock_info) {
+ __free_page(mdev->clock_info_page);
+ mlx5_core_warn(mdev, "failed to map clock page\n");
+ } else {
+ mdev->clock_info->sign = 0;
+ mdev->clock_info->nsec = clock->tc.nsec;
+ mdev->clock_info->cycles = clock->tc.cycle_last;
+ mdev->clock_info->mask = clock->cycles.mask;
+ mdev->clock_info->mult = clock->nominal_c_mult;
+ mdev->clock_info->shift = clock->cycles.shift;
+ mdev->clock_info->frac = clock->tc.frac;
+ mdev->clock_info->overflow_period =
+ clock->overflow_period;
+ }
+ }
+
INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
if (clock->overflow_period)
@@ -525,5 +573,12 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
cancel_work_sync(&clock->pps_info.out_work);
cancel_delayed_work_sync(&clock->overflow_work);
+
+ if (mdev->clock_info) {
+ kunmap(mdev->clock_info_page);
+ __free_page(mdev->clock_info_page);
+ mdev->clock_info = NULL;
+ }
+
kfree(clock->ptp_info.pin_config);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index 573f59f46d41..7722a3f9bb68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
u8 roce_version, u8 roce_l3_type, const u8 *gid,
- const u8 *mac, bool vlan, u16 vlan_id)
+ const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
{
#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
@@ -148,6 +148,9 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
memcpy(addr_l3_addr, gid, gidsz);
}
+ if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0)
+ MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num);
+
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0f88fd30a09a..2ef641c91c26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -75,6 +75,8 @@ static unsigned int prof_sel = MLX5_DEFAULT_PROF;
module_param_named(prof_sel, prof_sel, uint, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+static u32 sw_owner_id[4];
+
enum {
MLX5_ATOMIC_REQ_MODE_BE = 0x0,
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
@@ -551,6 +553,15 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
cache_line_128byte,
cache_line_size() == 128 ? 1 : 0);
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
+
+ if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
+ MLX5_SET(cmd_hca_cap,
+ set_hca_cap,
+ num_vhca_ports,
+ MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
+
err = set_caps(dev, set_ctx, set_sz,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
@@ -1107,7 +1118,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto reclaim_boot_pages;
}
- err = mlx5_cmd_init_hca(dev);
+ err = mlx5_cmd_init_hca(dev, sw_owner_id);
if (err) {
dev_err(&pdev->dev, "init hca failed\n");
goto err_pagealloc_stop;
@@ -1643,6 +1654,8 @@ static int __init init(void)
{
int err;
+ get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
+
mlx5_core_verify_params();
mlx5_register_debugfs();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index ff4a0b889a6f..b05868728da7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -86,7 +86,7 @@ enum {
int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
int mlx5_query_board_id(struct mlx5_core_dev *dev);
-int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 889130edb715..02d6c5b5d502 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -98,6 +98,11 @@ static u64 sq_allowed_event_types(void)
return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
}
+static u64 dct_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
+}
+
static bool is_event_type_allowed(int rsc_type, int event_type)
{
switch (rsc_type) {
@@ -107,6 +112,8 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
return BIT(event_type) & rq_allowed_event_types();
case MLX5_EVENT_QUEUE_TYPE_SQ:
return BIT(event_type) & sq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_DCT:
+ return BIT(event_type) & dct_allowed_event_types();
default:
WARN(1, "Event arrived for unknown resource type");
return false;
@@ -116,6 +123,7 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
{
struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+ struct mlx5_core_dct *dct;
struct mlx5_core_qp *qp;
if (!common)
@@ -134,7 +142,11 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
qp = (struct mlx5_core_qp *)common;
qp->event(qp, event_type);
break;
-
+ case MLX5_RES_DCT:
+ dct = (struct mlx5_core_dct *)common;
+ if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
+ complete(&dct->drained);
+ break;
default:
mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
}
@@ -142,9 +154,9 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
mlx5_core_put_rsc(common);
}
-static int create_qprqsq_common(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *qp,
- int rsc_type)
+static int create_resource_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ int rsc_type)
{
struct mlx5_qp_table *table = &dev->priv.qp_table;
int err;
@@ -165,8 +177,8 @@ static int create_qprqsq_common(struct mlx5_core_dev *dev,
return 0;
}
-static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
- struct mlx5_core_qp *qp)
+static void destroy_resource_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
{
struct mlx5_qp_table *table = &dev->priv.qp_table;
unsigned long flags;
@@ -179,6 +191,40 @@ static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
wait_for_completion(&qp->common.free);
}
+int mlx5_core_create_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0};
+ u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
+ u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ init_completion(&dct->drained);
+ MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
+
+ err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+ if (err) {
+ mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
+ return err;
+ }
+
+ qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+ err = create_resource_common(dev, qp, MLX5_RES_DCT);
+ if (err)
+ goto err_cmd;
+
+ return 0;
+err_cmd:
+ MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
+ mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
+ (void *)&out, sizeof(dout));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
+
int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
u32 *in, int inlen)
@@ -197,7 +243,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
qp->qpn = MLX5_GET(create_qp_out, out, qpn);
mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
- err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
+ err = create_resource_common(dev, qp, MLX5_RES_QP);
if (err)
goto err_cmd;
@@ -220,6 +266,47 @@ err_cmd:
}
EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
+static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
+ MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+ return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)&out, sizeof(out));
+}
+
+int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ err = mlx5_core_drain_dct(dev, dct);
+ if (err) {
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ goto destroy;
+ } else {
+ mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err);
+ return err;
+ }
+ }
+ wait_for_completion(&dct->drained);
+destroy:
+ destroy_resource_common(dev, &dct->mqp);
+ MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)&out, sizeof(out));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
+
int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp)
{
@@ -229,7 +316,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
mlx5_debug_qp_remove(dev, qp);
- destroy_qprqsq_common(dev, qp);
+ destroy_resource_common(dev, qp);
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
@@ -405,6 +492,20 @@ int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
}
EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
+int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
+ MLX5_SET(query_dct_in, in, dctn, qp->qpn);
+
+ return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)out, outlen);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_dct_query);
+
int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
{
u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
@@ -441,7 +542,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
return err;
rq->qpn = rqn;
- err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
+ err = create_resource_common(dev, rq, MLX5_RES_RQ);
if (err)
goto err_destroy_rq;
@@ -457,7 +558,7 @@ EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
struct mlx5_core_qp *rq)
{
- destroy_qprqsq_common(dev, rq);
+ destroy_resource_common(dev, rq);
mlx5_core_destroy_rq(dev, rq->qpn);
}
EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
@@ -473,7 +574,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
return err;
sq->qpn = sqn;
- err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
+ err = create_resource_common(dev, sq, MLX5_RES_SQ);
if (err)
goto err_destroy_sq;
@@ -489,7 +590,7 @@ EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
struct mlx5_core_qp *sq)
{
- destroy_qprqsq_common(dev, sq);
+ destroy_resource_common(dev, sq);
mlx5_core_destroy_sq(dev, sq->qpn);
}
EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index a1296a62497d..dfe36cf6fbea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -36,6 +36,9 @@
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
+/* Mutex to hold while enabling or disabling RoCE */
+static DEFINE_MUTEX(mlx5_roce_en_lock);
+
static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u32 *out, int outlen)
{
@@ -998,17 +1001,35 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
{
- if (atomic_inc_return(&mdev->roce.roce_en) != 1)
- return 0;
- return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (!mdev->roce.roce_en)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+
+ if (!err)
+ mdev->roce.roce_en++;
+ mutex_unlock(&mlx5_roce_en_lock);
+
+ return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
{
- if (atomic_dec_return(&mdev->roce.roce_en) != 0)
- return 0;
- return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (mdev->roce.roce_en) {
+ mdev->roce.roce_en--;
+ if (mdev->roce.roce_en == 0)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+
+ if (err)
+ mdev->roce.roce_en++;
+ }
+ mutex_unlock(&mlx5_roce_en_lock);
+ return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
@@ -1110,3 +1131,61 @@ ex:
return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
+
+int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
+ struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5_nic_vport_enable_roce(port_mdev);
+ if (err)
+ goto free;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN(master_mdev, vhca_id));
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria,
+ MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+
+ err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ if (err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+free:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport);
+
+int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id, 0);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria, 0);
+
+ err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ if (!err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 409ffb14298a..e5258ee4e38b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -79,6 +79,11 @@
<< __mlx5_dw_bit_off(typ, fld))); \
} while (0)
+#define MLX5_ARRAY_SET(typ, p, fld, idx, v) do { \
+ BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 32); \
+ MLX5_SET(typ, p, fld[idx], v); \
+} while (0)
+
#define MLX5_SET_TO_ONES(typ, p, fld) do { \
BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \
*((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
@@ -244,6 +249,8 @@ enum {
MLX5_NON_FP_BFREGS_PER_UAR,
MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE,
+ MLX5_MIN_DYN_BFREGS = 512,
+ MLX5_MAX_DYN_BFREGS = 1024,
};
enum {
@@ -284,6 +291,7 @@ enum {
MLX5_EVENT_QUEUE_TYPE_QP = 0,
MLX5_EVENT_QUEUE_TYPE_RQ = 1,
MLX5_EVENT_QUEUE_TYPE_SQ = 2,
+ MLX5_EVENT_QUEUE_TYPE_DCT = 6,
};
enum mlx5_event {
@@ -319,6 +327,8 @@ enum mlx5_event {
MLX5_EVENT_TYPE_PAGE_FAULT = 0xc,
MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd,
+ MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c,
+
MLX5_EVENT_TYPE_FPGA_ERROR = 0x20,
};
@@ -611,6 +621,11 @@ struct mlx5_eqe_pps {
u8 rsvd2[12];
} __packed;
+struct mlx5_eqe_dct {
+ __be32 reserved[6];
+ __be32 dctn;
+};
+
union ev_data {
__be32 raw[7];
struct mlx5_eqe_cmd cmd;
@@ -626,6 +641,7 @@ union ev_data {
struct mlx5_eqe_vport_change vport_change;
struct mlx5_eqe_port_module port_module;
struct mlx5_eqe_pps pps;
+ struct mlx5_eqe_dct dct;
} __packed;
struct mlx5_eqe {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a0610427e168..fb7e8b205eb9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -155,6 +155,13 @@ enum mlx5_dcbx_oper_mode {
MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3,
};
+enum mlx5_dct_atomic_mode {
+ MLX5_ATOMIC_MODE_DCT_OFF = 20,
+ MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF,
+ MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF,
+ MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF,
+};
+
enum {
MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0,
MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1,
@@ -231,6 +238,9 @@ struct mlx5_bfreg_info {
u32 ver;
bool lib_uar_4k;
u32 num_sys_pages;
+ u32 num_static_sys_pages;
+ u32 total_num_bfregs;
+ u32 num_dyn_bfregs;
};
struct mlx5_cmd_first {
@@ -430,6 +440,7 @@ enum mlx5_res_type {
MLX5_RES_SRQ = 3,
MLX5_RES_XSRQ = 4,
MLX5_RES_XRQ = 5,
+ MLX5_RES_DCT = MLX5_EVENT_QUEUE_TYPE_DCT,
};
struct mlx5_core_rsc_common {
@@ -788,6 +799,7 @@ struct mlx5_clock {
u32 nominal_c_mult;
unsigned long overflow_period;
struct delayed_work overflow_work;
+ struct mlx5_core_dev *mdev;
struct ptp_clock *ptp;
struct ptp_clock_info ptp_info;
struct mlx5_pps pps_info;
@@ -826,7 +838,7 @@ struct mlx5_core_dev {
struct mlx5e_resources mlx5e_res;
struct {
struct mlx5_rsvd_gids reserved_gids;
- atomic_t roce_en;
+ u32 roce_en;
} roce;
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
@@ -835,6 +847,8 @@ struct mlx5_core_dev {
struct cpu_rmap *rmap;
#endif
struct mlx5_clock clock;
+ struct mlx5_ib_clock_info *clock_info;
+ struct page *clock_info_page;
};
struct mlx5_db {
@@ -1103,7 +1117,7 @@ void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
u8 roce_version, u8 roce_l3_type, const u8 *gid,
- const u8 *mac, bool vlan, u16 vlan_id);
+ const u8 *mac, bool vlan, u16 vlan_id, u8 port_num);
static inline int fw_initializing(struct mlx5_core_dev *dev)
{
@@ -1225,6 +1239,31 @@ static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
return !!(dev->priv.rl_table.max_size);
}
+static inline int mlx5_core_is_mp_slave(struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, affiliate_nic_vport_criteria) &&
+ MLX5_CAP_GEN(dev, num_vhca_ports) <= 1;
+}
+
+static inline int mlx5_core_is_mp_master(struct mlx5_core_dev *dev)
+{
+ return MLX5_CAP_GEN(dev, num_vhca_ports) > 1;
+}
+
+static inline int mlx5_core_mp_enabled(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_mp_slave(dev) ||
+ mlx5_core_is_mp_master(dev);
+}
+
+static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_mp_enabled(dev))
+ return 1;
+
+ return MLX5_CAP_GEN(dev, native_port_num);
+}
+
enum {
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
};
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1391a82da98e..7ac7bd76c7af 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -502,7 +502,7 @@ struct mlx5_ifc_ads_bits {
u8 dei_cfi[0x1];
u8 eth_prio[0x3];
u8 sl[0x4];
- u8 port[0x8];
+ u8 vhca_port_num[0x8];
u8 rmac_47_32[0x10];
u8 rmac_31_0[0x20];
@@ -794,7 +794,10 @@ enum {
};
struct mlx5_ifc_cmd_hca_cap_bits {
- u8 reserved_at_0[0x80];
+ u8 reserved_at_0[0x30];
+ u8 vhca_id[0x10];
+
+ u8 reserved_at_40[0x40];
u8 log_max_srq_sz[0x8];
u8 log_max_qp_sz[0x8];
@@ -1067,7 +1070,12 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_5f8[0x3];
u8 log_max_xrq[0x5];
- u8 reserved_at_600[0x200];
+ u8 affiliate_nic_vport_criteria[0x8];
+ u8 native_port_num[0x8];
+ u8 num_vhca_ports[0x8];
+ u8 reserved_at_618[0x6];
+ u8 sw_owner_id[0x1];
+ u8 reserved_at_61f[0x1e1];
};
enum mlx5_flow_destination_type {
@@ -2616,7 +2624,12 @@ struct mlx5_ifc_nic_vport_context_bits {
u8 event_on_mc_address_change[0x1];
u8 event_on_uc_address_change[0x1];
- u8 reserved_at_40[0xf0];
+ u8 reserved_at_40[0xc];
+
+ u8 affiliation_criteria[0x4];
+ u8 affiliated_vhca_id[0x10];
+
+ u8 reserved_at_60[0xd0];
u8 mtu[0x10];
@@ -3259,7 +3272,8 @@ struct mlx5_ifc_set_roce_address_in_bits {
u8 op_mod[0x10];
u8 roce_address_index[0x10];
- u8 reserved_at_50[0x10];
+ u8 reserved_at_50[0xc];
+ u8 vhca_port_num[0x4];
u8 reserved_at_60[0x20];
@@ -3879,7 +3893,8 @@ struct mlx5_ifc_query_roce_address_in_bits {
u8 op_mod[0x10];
u8 roce_address_index[0x10];
- u8 reserved_at_50[0x10];
+ u8 reserved_at_50[0xc];
+ u8 vhca_port_num[0x4];
u8 reserved_at_60[0x20];
};
@@ -5311,7 +5326,9 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits {
};
struct mlx5_ifc_modify_nic_vport_field_select_bits {
- u8 reserved_at_0[0x14];
+ u8 reserved_at_0[0x12];
+ u8 affiliation[0x1];
+ u8 reserved_at_e[0x1];
u8 disable_uc_local_lb[0x1];
u8 disable_mc_local_lb[0x1];
u8 node_guid[0x1];
@@ -5532,6 +5549,7 @@ struct mlx5_ifc_init_hca_in_bits {
u8 op_mod[0x10];
u8 reserved_at_40[0x40];
+ u8 sw_owner_id[4][0x20];
};
struct mlx5_ifc_init2rtr_qp_out_bits {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 62af7512dabb..4778d41085d4 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -473,6 +473,11 @@ struct mlx5_core_qp {
int pid;
};
+struct mlx5_core_dct {
+ struct mlx5_core_qp mqp;
+ struct completion drained;
+};
+
struct mlx5_qp_path {
u8 fl_free_ar;
u8 rsvd3;
@@ -549,6 +554,9 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev,
return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
}
+int mlx5_core_create_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *qp,
+ u32 *in, int inlen);
int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
u32 *in,
@@ -558,8 +566,12 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
struct mlx5_core_qp *qp);
int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp);
+int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct);
int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
u32 *out, int outlen);
+int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen);
int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
u32 timeout_usec);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index aaa0bb9e7655..64e193e87394 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -116,4 +116,8 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
struct mlx5_hca_vport_context *req);
int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable);
int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status);
+
+int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
+ struct mlx5_core_dev *port_mdev);
+int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 18c564f60e93..d656809f1217 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -94,7 +94,7 @@ struct rdma_dev_addr {
* The dev_addr->net field must be initialized.
*/
int rdma_translate_ip(const struct sockaddr *addr,
- struct rdma_dev_addr *dev_addr, u16 *vlan_id);
+ struct rdma_dev_addr *dev_addr);
/**
* rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -131,10 +131,9 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
int rdma_addr_size(struct sockaddr *addr);
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
- u8 *smac, u16 *vlan_id, int *if_index,
+ u8 *dmac, const struct net_device *ndev,
int *hoplimit);
static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
@@ -198,34 +197,15 @@ static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid)
}
}
-static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
-{
- struct net_device *dev;
- struct in_device *ip4;
-
- dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (dev) {
- ip4 = in_dev_get(dev);
- if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address)
- ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address,
- (struct in6_addr *)gid);
-
- if (ip4)
- in_dev_put(ip4);
-
- dev_put(dev);
- }
-}
-
+/*
+ * rdma_get/set_sgid/dgid() APIs are applicable to IB, and iWarp.
+ * They are not applicable to RoCE.
+ * RoCE GIDs are derived from the IP addresses.
+ */
static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
{
- if (dev_addr->transport == RDMA_TRANSPORT_IB &&
- dev_addr->dev_type != ARPHRD_INFINIBAND)
- iboe_addr_get_sgid(dev_addr, gid);
- else
- memcpy(gid, dev_addr->src_dev_addr +
- rdma_addr_gid_offset(dev_addr), sizeof *gid);
+ memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr),
+ sizeof(*gid));
}
static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 1f7f604db5aa..811cfcfcbe3d 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -549,12 +549,12 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct rdma_ah_attr *ah_attr);
/**
- * ib_init_ah_from_path - Initialize address handle attributes based on an SA
- * path record.
+ * ib_init_ah_attr_from_path - Initialize address handle attributes based on
+ * an SA path record.
*/
-int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr);
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr);
/**
* ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index fd84cda5ed7c..5263c86fd103 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -63,6 +63,7 @@
#include <linux/uaccess.h>
#include <linux/cgroup_rdma.h>
#include <uapi/rdma/ib_user_verbs.h>
+#include <rdma/restrack.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
@@ -300,11 +301,6 @@ struct ib_tm_caps {
u32 max_sge;
};
-enum ib_cq_creation_flags {
- IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
- IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
-};
-
struct ib_cq_init_attr {
unsigned int cqe;
int comp_vector;
@@ -983,9 +979,9 @@ struct ib_wc {
u32 invalidate_rkey;
} ex;
u32 src_qp;
+ u32 slid;
int wc_flags;
u16 pkey_index;
- u32 slid;
u8 sl;
u8 dlid_path_bits;
u8 port_num; /* valid only for DR SMPs on switches */
@@ -1082,6 +1078,7 @@ enum ib_qp_type {
IB_QPT_XRC_INI = 9,
IB_QPT_XRC_TGT,
IB_QPT_MAX,
+ IB_QPT_DRIVER = 0xFF,
/* Reserve a range for qp types internal to the low level driver.
* These qp types will not be visible at the IB core layer, so the
* IB_QPT_MAX usages should not be affected in the core layer
@@ -1529,6 +1526,7 @@ struct ib_pd {
* Implementation details of the RDMA core, don't use in drivers:
*/
struct ib_mr *__internal_mr;
+ struct rdma_restrack_entry res;
};
struct ib_xrcd {
@@ -1538,6 +1536,10 @@ struct ib_xrcd {
struct mutex tgt_qp_mutex;
struct list_head tgt_qp_list;
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_ah {
@@ -1569,6 +1571,10 @@ struct ib_cq {
struct irq_poll iop;
struct work_struct work;
};
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_srq {
@@ -1745,6 +1751,11 @@ struct ib_qp {
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_qp_security *qp_sec;
u8 port;
+
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_mr {
@@ -2351,6 +2362,10 @@ struct ib_device {
#endif
u32 index;
+ /*
+ * Implementation details of the RDMA core, don't use in drivers
+ */
+ struct rdma_restrack_root res;
/**
* The following mandatory functions are used only at device
@@ -2836,8 +2851,7 @@ int ib_modify_port(struct ib_device *device,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- enum ib_gid_type gid_type, struct net_device *ndev,
- u8 *port_num, u16 *index);
+ struct net_device *ndev, u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
@@ -2858,7 +2872,7 @@ enum ib_pd_flags {
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
#define ib_alloc_pd(device, flags) \
- __ib_alloc_pd((device), (flags), __func__)
+ __ib_alloc_pd((device), (flags), KBUILD_MODNAME)
void ib_dealloc_pd(struct ib_pd *pd);
/**
@@ -2905,7 +2919,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
/**
- * ib_init_ah_from_wc - Initializes address handle attributes from a
+ * ib_init_ah_attr_from_wc - Initializes address handle attributes from a
* work completion.
* @device: Device on which the received message arrived.
* @port_num: Port on which the received message arrived.
@@ -2915,9 +2929,9 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
* @ah_attr: Returned attributes that can be used when creating an address
* handle for replying to the message.
*/
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
- const struct ib_wc *wc, const struct ib_grh *grh,
- struct rdma_ah_attr *ah_attr);
+int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct rdma_ah_attr *ah_attr);
/**
* ib_create_ah_from_wc - Creates an address handle associated with the
@@ -3135,8 +3149,12 @@ static inline int ib_post_recv(struct ib_qp *qp,
return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
}
-struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx, const char *caller);
+#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \
+ __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME)
+
void ib_free_cq(struct ib_cq *cq);
int ib_process_cq_direct(struct ib_cq *cq, int budget);
@@ -3560,8 +3578,11 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
/**
* ib_alloc_xrcd - Allocates an XRC domain.
* @device: The device on which to allocate the XRC domain.
+ * @caller: Module name for kernel consumers
*/
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
+#define ib_alloc_xrcd(device) \
+ __ib_alloc_xrcd((device), KBUILD_MODNAME)
/**
* ib_dealloc_xrcd - Deallocates an XRC domain.
@@ -3793,8 +3814,7 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
u32 port_num)
{
- if ((rdma_protocol_roce(dev, port_num)) ||
- (rdma_protocol_iwarp(dev, port_num)))
+ if (rdma_protocol_roce(dev, port_num))
return RDMA_AH_ATTR_TYPE_ROCE;
else if ((rdma_protocol_ib(dev, port_num)) &&
(rdma_cap_opa_ah(dev, port_num)))
@@ -3850,4 +3870,12 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
}
+/**
+ * rdma_roce_rescan_device - Rescan all of the network devices in the system
+ * and add their gids, as needed, to the relevant RoCE devices.
+ *
+ * @device: the rdma device
+ */
+void rdma_roce_rescan_device(struct ib_device *ibdev);
+
#endif /* IB_VERBS_H */
diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h
index f68fca296631..2bbb7a67e643 100644
--- a/include/rdma/opa_addr.h
+++ b/include/rdma/opa_addr.h
@@ -114,4 +114,20 @@ static inline u32 opa_get_mcast_base(u32 nr_top_bits)
return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits));
}
+/* Check for a valid unicast LID for non-SM traffic types */
+static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr)
+{
+ if (attr->type == RDMA_AH_ATTR_TYPE_IB) {
+ if (!rdma_ah_get_dlid(attr) ||
+ rdma_ah_get_dlid(attr) >=
+ be32_to_cpu(IB_MULTICAST_LID_BASE))
+ return false;
+ } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) {
+ if (!rdma_ah_get_dlid(attr) ||
+ rdma_ah_get_dlid(attr) >=
+ opa_get_mcast_base(OPA_MCAST_NR))
+ return false;
+ }
+ return true;
+}
#endif /* OPA_ADDR_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 3d2eed3c4e75..6538a5cc27b6 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -413,4 +413,23 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason);
const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
struct rdma_cm_event *ev, u8 *data_len);
+/**
+ * rdma_read_gids - Return the SGID and DGID used for establishing
+ * connection. This can be used after rdma_resolve_addr()
+ * on client side. This can be use on new connection
+ * on server side. This is applicable to IB, RoCE, iWarp.
+ * If cm_id is not bound yet to the RDMA device, it doesn't
+ * copy and SGID or DGID to the given pointers.
+ * @id: Communication identifier whose GIDs are queried.
+ * @sgid: Pointer to SGID where SGID will be returned. It is optional.
+ * @dgid: Pointer to DGID where DGID will be returned. It is optional.
+ * Note: This API should not be used by any new ULPs or new code.
+ * Instead, users interested in querying GIDs should refer to path record
+ * of the rdma_cm_id to query the GIDs.
+ * This API is provided for compatibility for existing users.
+ */
+
+void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
+ union ib_gid *dgid);
+
#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h
index 6947a6ba2557..6a69d71a21a5 100644
--- a/include/rdma/rdma_cm_ib.h
+++ b/include/rdma/rdma_cm_ib.h
@@ -36,17 +36,17 @@
#include <rdma/rdma_cm.h>
/**
- * rdma_set_ib_paths - Manually sets the path records used to establish a
+ * rdma_set_ib_path - Manually sets the path record used to establish a
* connection.
* @id: Connection identifier associated with the request.
* @path_rec: Reference to the path record
*
* This call permits a user to specify routing information for rdma_cm_id's
- * bound to Infiniband devices. It is called on the client side of a
+ * bound to InfiniBand devices. It is called on the client side of a
* connection and replaces the call to rdma_resolve_route.
*/
-int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct sa_path_rec *path_rec, int num_paths);
+int rdma_set_ib_path(struct rdma_cm_id *id,
+ struct sa_path_rec *path_rec);
/* Global qkey for UDP QPs and multicast groups. */
#define RDMA_UDP_QKEY 0x01234567
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 1ba84a78f1c5..4118324a0310 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -228,13 +228,6 @@ struct rvt_driver_provided {
int (*port_callback)(struct ib_device *, u8, struct kobject *);
/*
- * Returns a string to represent the device for which is being
- * registered. This is primarily used for error and debug messages on
- * the console.
- */
- const char * (*get_card_name)(struct rvt_dev_info *rdi);
-
- /*
* Returns a pointer to the undelying hardware's PCI device. This is
* used to display information as to what hardware is being referenced
* in an output message
@@ -419,6 +412,30 @@ struct rvt_dev_info {
};
+/**
+ * rvt_set_ibdev_name - Craft an IB device name from client info
+ * @rdi: pointer to the client rvt_dev_info structure
+ * @name: client specific name
+ * @unit: client specific unit number.
+ */
+static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
+ const char *fmt, const char *name,
+ const int unit)
+{
+ snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit);
+}
+
+/**
+ * rvt_get_ibdev_name - return the IB name
+ * @rdi: rdmavt device
+ *
+ * Return the registered name of the device.
+ */
+static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
+{
+ return rdi->ibdev.name;
+}
+
static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct rvt_pd, ibpd);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
new file mode 100644
index 000000000000..c2d81167c858
--- /dev/null
+++ b/include/rdma/restrack.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_RESTRACK_H_
+#define _RDMA_RESTRACK_H_
+
+#include <linux/typecheck.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+
+/**
+ * enum rdma_restrack_type - HW objects to track
+ */
+enum rdma_restrack_type {
+ /**
+ * @RDMA_RESTRACK_PD: Protection domain (PD)
+ */
+ RDMA_RESTRACK_PD,
+ /**
+ * @RDMA_RESTRACK_CQ: Completion queue (CQ)
+ */
+ RDMA_RESTRACK_CQ,
+ /**
+ * @RDMA_RESTRACK_QP: Queue pair (QP)
+ */
+ RDMA_RESTRACK_QP,
+ /**
+ * @RDMA_RESTRACK_XRCD: XRC domain (XRCD)
+ */
+ RDMA_RESTRACK_XRCD,
+ /**
+ * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
+ */
+ RDMA_RESTRACK_MAX
+};
+
+#define RDMA_RESTRACK_HASH_BITS 8
+/**
+ * struct rdma_restrack_root - main resource tracking management
+ * entity, per-device
+ */
+struct rdma_restrack_root {
+ /*
+ * @rwsem: Read/write lock to protect lists
+ */
+ struct rw_semaphore rwsem;
+ /**
+ * @hash: global database for all resources per-device
+ */
+ DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
+};
+
+/**
+ * struct rdma_restrack_entry - metadata per-entry
+ */
+struct rdma_restrack_entry {
+ /**
+ * @valid: validity indicator
+ *
+ * The entries are filled during rdma_restrack_add,
+ * can be attempted to be free during rdma_restrack_del.
+ *
+ * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI
+ */
+ bool valid;
+ /*
+ * @kref: Protect destroy of the resource
+ */
+ struct kref kref;
+ /*
+ * @comp: Signal that all consumers of resource are completed their work
+ */
+ struct completion comp;
+ /**
+ * @task: owner of resource tracking entity
+ *
+ * There are two types of entities: created by user and created
+ * by kernel.
+ *
+ * This is relevant for the entities created by users.
+ * For the entities created by kernel, this pointer will be NULL.
+ */
+ struct task_struct *task;
+ /**
+ * @kern_name: name of owner for the kernel created entities.
+ */
+ const char *kern_name;
+ /**
+ * @node: hash table entry
+ */
+ struct hlist_node node;
+ /**
+ * @type: various objects in restrack database
+ */
+ enum rdma_restrack_type type;
+};
+
+/**
+ * rdma_restrack_init() - initialize resource tracking
+ * @res: resource tracking root
+ */
+void rdma_restrack_init(struct rdma_restrack_root *res);
+
+/**
+ * rdma_restrack_clean() - clean resource tracking
+ * @res: resource tracking root
+ */
+void rdma_restrack_clean(struct rdma_restrack_root *res);
+
+/**
+ * rdma_restrack_count() - the current usage of specific object
+ * @res: resource entry
+ * @type: actual type of object to operate
+ * @ns: PID namespace
+ */
+int rdma_restrack_count(struct rdma_restrack_root *res,
+ enum rdma_restrack_type type,
+ struct pid_namespace *ns);
+
+/**
+ * rdma_restrack_add() - add object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+
+/**
+ * rdma_restrack_del() - delete object from the reource tracking database
+ * @res: resource entry
+ * @type: actual type of object to operate
+ */
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+
+/**
+ * rdma_is_kernel_res() - check the owner of resource
+ * @res: resource entry
+ */
+static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
+{
+ return !res->task;
+}
+
+/**
+ * rdma_restrack_get() - grab to protect resource from release
+ * @res: resource entry
+ */
+int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
+
+/**
+ * rdma_restrack_put() - relase resource
+ * @res: resource entry
+ */
+int rdma_restrack_put(struct rdma_restrack_entry *res);
+#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index 5be834de491a..c16a3c9a4d9b 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -129,6 +129,23 @@ struct srp_login_req {
u8 target_port_id[16];
};
+/**
+ * struct srp_login_req_rdma - RDMA/CM login parameters.
+ *
+ * RDMA/CM over InfiniBand can only carry 92 - 36 = 56 bytes of private
+ * data. The %srp_login_req_rdma structure contains the same information as
+ * %srp_login_req but with the reserved data removed.
+ */
+struct srp_login_req_rdma {
+ u64 tag;
+ __be16 req_buf_fmt;
+ u8 req_flags;
+ u8 opcode;
+ __be32 req_it_iu_len;
+ u8 initiator_port_id[16];
+ u8 target_port_id[16];
+};
+
/*
* The SRP spec defines the size of the LOGIN_RSP structure to be 52
* bytes, so it needs to be packed to avoid having it padded to 56
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index 398a514ee446..db54115be044 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -82,6 +82,15 @@ struct bnxt_re_qp_resp {
__u32 rsvd;
};
+struct bnxt_re_srq_req {
+ __u64 srqva;
+ __u64 srq_handle;
+};
+
+struct bnxt_re_srq_resp {
+ __u32 srqid;
+};
+
enum bnxt_re_shpg_offt {
BNXT_RE_BEG_RESV_OFFT = 0x00,
BNXT_RE_AVID_OFFT = 0x10,
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 7e11bb8651b6..04d0e67b1312 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -402,13 +402,18 @@ struct ib_uverbs_create_cq {
__u64 driver_data[0];
};
+enum ib_uverbs_ex_create_cq_flags {
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
+ IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
+};
+
struct ib_uverbs_ex_create_cq {
__u64 user_handle;
__u32 cqe;
__u32 comp_vector;
__s32 comp_channel;
__u32 comp_mask;
- __u32 flags;
+ __u32 flags; /* bitmask of ib_uverbs_ex_create_cq_flags */
__u32 reserved;
};
@@ -449,7 +454,7 @@ struct ib_uverbs_wc {
__u32 vendor_err;
__u32 byte_len;
union {
- __u32 imm_data;
+ __be32 imm_data;
__u32 invalidate_rkey;
} ex;
__u32 qp_num;
@@ -765,7 +770,7 @@ struct ib_uverbs_send_wr {
__u32 opcode;
__u32 send_flags;
union {
- __u32 imm_data;
+ __be32 imm_data;
__u32 invalidate_rkey;
} ex;
union {
diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index 224b52b6279c..7f9c37346613 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -97,8 +97,8 @@ struct mlx4_ib_create_srq_resp {
};
struct mlx4_ib_create_qp_rss {
- __u64 rx_hash_fields_mask;
- __u8 rx_hash_function;
+ __u64 rx_hash_fields_mask; /* Use enum mlx4_ib_rx_hash_fields */
+ __u8 rx_hash_function; /* Use enum mlx4_ib_rx_hash_function_flags */
__u8 reserved[7];
__u8 rx_hash_key[40];
__u32 comp_mask;
@@ -152,7 +152,8 @@ enum mlx4_ib_rx_hash_fields {
MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4,
MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5,
MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6,
- MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7
+ MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7,
+ MLX4_IB_RX_HASH_INNER = 1ULL << 31,
};
#endif /* MLX4_ABI_USER_H */
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index a33e0517d3fd..1111aa4e7c1e 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -41,6 +41,9 @@ enum {
MLX5_QP_FLAG_SIGNATURE = 1 << 0,
MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
MLX5_QP_FLAG_TUNNEL_OFFLOADS = 1 << 2,
+ MLX5_QP_FLAG_BFREG_INDEX = 1 << 3,
+ MLX5_QP_FLAG_TYPE_DCT = 1 << 4,
+ MLX5_QP_FLAG_TYPE_DCI = 1 << 5,
};
enum {
@@ -121,10 +124,12 @@ struct mlx5_ib_alloc_ucontext_resp {
__u8 cqe_version;
__u8 cmds_supp_uhw;
__u8 eth_min_inline;
- __u8 reserved2;
+ __u8 clock_info_versions;
__u64 hca_core_clock_offset;
__u32 log_uar_size;
__u32 num_uars_per_page;
+ __u32 num_dyn_bfregs;
+ __u32 reserved3;
};
struct mlx5_ib_alloc_pd_resp {
@@ -280,8 +285,11 @@ struct mlx5_ib_create_qp {
__u32 rq_wqe_shift;
__u32 flags;
__u32 uidx;
- __u32 reserved0;
- __u64 sq_buf_addr;
+ __u32 bfreg_index;
+ union {
+ __u64 sq_buf_addr;
+ __u64 access_key;
+ };
};
/* RX Hash function flags */
@@ -307,7 +315,7 @@ enum mlx5_rx_hash_fields {
MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6,
MLX5_RX_HASH_DST_PORT_UDP = 1 << 7,
/* Save bits for future fields */
- MLX5_RX_HASH_INNER = 1 << 31
+ MLX5_RX_HASH_INNER = (1UL << 31),
};
struct mlx5_ib_create_qp_rss {
@@ -354,6 +362,11 @@ struct mlx5_ib_create_ah_resp {
__u8 reserved[6];
};
+struct mlx5_ib_modify_qp_resp {
+ __u32 response_length;
+ __u32 dctn;
+};
+
struct mlx5_ib_create_wq_resp {
__u32 response_length;
__u32 reserved;
@@ -368,4 +381,36 @@ struct mlx5_ib_modify_wq {
__u32 comp_mask;
__u32 reserved;
};
+
+struct mlx5_ib_clock_info {
+ __u32 sign;
+ __u32 resv;
+ __u64 nsec;
+ __u64 cycles;
+ __u64 frac;
+ __u32 mult;
+ __u32 shift;
+ __u64 mask;
+ __u64 overflow_period;
+};
+
+enum mlx5_ib_mmap_cmd {
+ MLX5_IB_MMAP_REGULAR_PAGE = 0,
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ MLX5_IB_MMAP_WC_PAGE = 2,
+ MLX5_IB_MMAP_NC_PAGE = 3,
+ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+ MLX5_IB_MMAP_CORE_CLOCK = 5,
+ MLX5_IB_MMAP_ALLOC_WC = 6,
+ MLX5_IB_MMAP_CLOCK_INFO = 7,
+};
+
+enum {
+ MLX5_IB_CLOCK_INFO_KERNEL_UPDATING = 1,
+};
+
+/* Bit indexes for the mlx5_alloc_ucontext_resp.clock_info_versions bitmap */
+enum {
+ MLX5_IB_CLOCK_INFO_V1 = 0,
+};
#endif /* MLX5_ABI_USER_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index cc002e316d09..17e59bec169e 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -236,6 +236,10 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_PORT_NEW,
RDMA_NLDEV_CMD_PORT_DEL,
+ RDMA_NLDEV_CMD_RES_GET, /* can dump */
+
+ RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
+
RDMA_NLDEV_NUM_OPS
};
@@ -303,6 +307,51 @@ enum rdma_nldev_attr {
RDMA_NLDEV_ATTR_DEV_NODE_TYPE, /* u8 */
+ RDMA_NLDEV_ATTR_RES_SUMMARY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, /* string */
+ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, /* u64 */
+
+ RDMA_NLDEV_ATTR_RES_QP, /* nested table */
+ RDMA_NLDEV_ATTR_RES_QP_ENTRY, /* nested table */
+ /*
+ * Local QPN
+ */
+ RDMA_NLDEV_ATTR_RES_LQPN, /* u32 */
+ /*
+ * Remote QPN,
+ * Applicable for RC and UC only IBTA 11.2.5.3 QUERY QUEUE PAIR
+ */
+ RDMA_NLDEV_ATTR_RES_RQPN, /* u32 */
+ /*
+ * Receive Queue PSN,
+ * Applicable for RC and UC only 11.2.5.3 QUERY QUEUE PAIR
+ */
+ RDMA_NLDEV_ATTR_RES_RQ_PSN, /* u32 */
+ /*
+ * Send Queue PSN
+ */
+ RDMA_NLDEV_ATTR_RES_SQ_PSN, /* u32 */
+ RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, /* u8 */
+ /*
+ * QP types as visible to RDMA/core, the reserved QPT
+ * are not exported through this interface.
+ */
+ RDMA_NLDEV_ATTR_RES_TYPE, /* u8 */
+ RDMA_NLDEV_ATTR_RES_STATE, /* u8 */
+ /*
+ * Process ID which created object,
+ * in case of kernel origin, PID won't exist.
+ */
+ RDMA_NLDEV_ATTR_RES_PID, /* u32 */
+ /*
+ * The name of process created following resource.
+ * It will exist only for kernel objects.
+ * For user created objects, the user is supposed
+ * to read /proc/PID/comm file.
+ */
+ RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */
+
RDMA_NLDEV_ATTR_MAX
};
#endif /* _UAPI_RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
index aaa352f2f110..02ca0d0f1eb7 100644
--- a/include/uapi/rdma/vmw_pvrdma-abi.h
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -52,12 +52,14 @@
#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */
#define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */
#define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */
-#define PVRDMA_UAR_QP_SEND BIT(30) /* Send bit. */
-#define PVRDMA_UAR_QP_RECV BIT(31) /* Recv bit. */
+#define PVRDMA_UAR_QP_SEND (1 << 30) /* Send bit. */
+#define PVRDMA_UAR_QP_RECV (1 << 31) /* Recv bit. */
#define PVRDMA_UAR_CQ_OFFSET 4 /* CQ doorbell. */
-#define PVRDMA_UAR_CQ_ARM_SOL BIT(29) /* Arm solicited bit. */
-#define PVRDMA_UAR_CQ_ARM BIT(30) /* Arm bit. */
-#define PVRDMA_UAR_CQ_POLL BIT(31) /* Poll bit. */
+#define PVRDMA_UAR_CQ_ARM_SOL (1 << 29) /* Arm solicited bit. */
+#define PVRDMA_UAR_CQ_ARM (1 << 30) /* Arm bit. */
+#define PVRDMA_UAR_CQ_POLL (1 << 31) /* Poll bit. */
+#define PVRDMA_UAR_SRQ_OFFSET 8 /* SRQ doorbell. */
+#define PVRDMA_UAR_SRQ_RECV (1 << 30) /* Recv bit. */
enum pvrdma_wr_opcode {
PVRDMA_WR_RDMA_WRITE,
diff --git a/lib/kobject.c b/lib/kobject.c
index 763d70a18941..06b849eee0ca 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -1039,6 +1039,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type)
return ns;
}
+EXPORT_SYMBOL_GPL(kobj_ns_grab_current);
const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
{
@@ -1074,3 +1075,4 @@ void kobj_ns_drop(enum kobj_ns_type type, void *ns)
kobj_ns_ops_tbl[type]->drop_ns(ns);
spin_unlock(&kobj_ns_type_lock);
}
+EXPORT_SYMBOL_GPL(kobj_ns_drop);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 36dd2099048a..b2a5067b4afe 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -301,13 +301,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
if (rds_conn_state(conn) == RDS_CONN_UP) {
struct rds_ib_device *rds_ibdev;
- struct rdma_dev_addr *dev_addr;
ic = conn->c_transport_data;
- dev_addr = &ic->i_cm_id->route.addr.dev_addr;
- rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
- rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+ rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
+ (union ib_gid *)&iinfo->dst_gid);
rds_ibdev = ic->rds_ibdev;
iinfo->max_send_wr = ic->i_send_ring.w_nr;