From 0fd27a88c2e4f548937fd7d93fc6e65c4ad7c278 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 18 Jan 2017 14:10:30 +0200 Subject: IB/mlx5: Fix out-of-bound access When we initialize buffer to create SRQ in kernel, the number of pages was less than actually used in following mlx5_fill_page_array(). Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Cc: # v3.10+ Signed-off-by: Leon Romanovsky Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/srq.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 6f4397ee1ed6..7cb145f9a6db 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, int err; int i; struct mlx5_wqe_srq_next_seg *next; - int page_shift; - int npages; err = mlx5_db_alloc(dev->mdev, &srq->db); if (err) { @@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_db; } - page_shift = srq->buf.page_shift; srq->head = 0; srq->tail = srq->msrq.max - 1; @@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, cpu_to_be16((i + 1) & (srq->msrq.max - 1)); } - npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); - mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", - buf_size, page_shift, srq->buf.npages, npages); - in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages); + mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift); + in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages); if (!in->pas) { err = -ENOMEM; goto err_buf; @@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } srq->wq_sig = !!srq_signature; - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type == IB_SRQT_XRC) in->user_index = MLX5_IB_DEFAULT_UIDX; -- cgit v1.2.3 From 12bbf1ea7e3b35892dbb8636b978160bc9576b61 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 18 Jan 2017 14:10:31 +0200 Subject: IB/mlx5: Return error for unsupported signature type In case of unsupported singature, we returned positive value, while the better approach is to return -EINVAL. In addition, in this change, the error print is enriched to provide an actual supplied signature type. Fixes: e6631814fb3a ("IB/mlx5: Support IB_WR_REG_SIG_MR") Cc: Sagi Grimberg Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6a83fb32599d..90210745366a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3637,8 +3637,9 @@ static int set_psv_wr(struct ib_sig_domain *domain, psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); break; default: - pr_err("Bad signature type given.\n"); - return 1; + pr_err("Bad signature type (%d) is given.\n", + domain->sig_type); + return -EINVAL; } *seg += sizeof(*psv_seg); -- cgit v1.2.3 From 45bded2c216da6010184ac5ebe88c27f73439009 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 18 Jan 2017 14:10:32 +0200 Subject: IB/mlx5: Verify that Q counters are supported Make sure that the Q counters are supported by the FW before trying to allocate/deallocte them, this will avoid driver load failure when they aren't supported by the FW. Fixes: 0837e86a7a34 ('IB/mlx5: Add per port counters') Cc: # v4.7+ Signed-off-by: Kamal Heib Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a191b9327b0c..0187f1d7234a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3313,9 +3313,11 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_rsrc; - err = mlx5_ib_alloc_q_counters(dev); - if (err) - goto err_odp; + if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { + err = mlx5_ib_alloc_q_counters(dev); + if (err) + goto err_odp; + } dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); if (!dev->mdev->priv.uar) @@ -3364,7 +3366,8 @@ err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); err_q_cnt: - mlx5_ib_dealloc_q_counters(dev); + if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + mlx5_ib_dealloc_q_counters(dev); err_odp: mlx5_ib_odp_remove_one(dev); @@ -3397,7 +3400,8 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); mlx5_put_uars_page(dev->mdev, mdev->priv.uar); - mlx5_ib_dealloc_q_counters(dev); + if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); -- cgit v1.2.3 From c43f1112c068f3b4b20a0a9d461c341d9caeb376 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 18 Jan 2017 14:10:33 +0200 Subject: IB/mlx5: Add additional checks before processing MADs Check the has_smi bit in vport context and class version of MADs before allowing MADs processing to take place. MAD_IFC SMI commands can be executed only if smi bit is set. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Maor Gottlieb Signed-off-by: Parvi Kaustubhi Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mad.c | 12 ++++++++++++ drivers/infiniband/hw/mlx5/main.c | 33 +++++++++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 1 + 3 files changed, 46 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 39e58489dcc2..af962e7fdc3a 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -42,12 +42,24 @@ enum { MLX5_IB_VENDOR_CLASS2 = 0xa }; +static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, + struct ib_mad *in_mad) +{ + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED && + in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + return true; + return dev->mdev->port_caps[port_num - 1].has_smi; +} + int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad) { u8 op_modifier = 0; + if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad)) + return -EPERM; + /* Key check traps can't be generated unless we have in_wc to * tell us where to send the trap. */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0187f1d7234a..1dea4073d83f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2533,6 +2533,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, ibdev->ib_active = false; } +static int set_has_smi_cap(struct mlx5_ib_dev *dev) +{ + struct mlx5_hca_vport_context vport_ctx; + int err; + int port; + + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { + dev->mdev->port_caps[port - 1].has_smi = false; + if (MLX5_CAP_GEN(dev->mdev, port_type) == + MLX5_CAP_PORT_TYPE_IB) { + if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { + err = mlx5_query_hca_vport_context(dev->mdev, 0, + port, 0, + &vport_ctx); + if (err) { + mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", + port, err); + return err; + } + dev->mdev->port_caps[port - 1].has_smi = + vport_ctx.has_smi; + } else { + dev->mdev->port_caps[port - 1].has_smi = true; + } + } + } + return 0; +} + static void get_ext_port_caps(struct mlx5_ib_dev *dev) { int port; @@ -2557,6 +2586,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev) if (!dprops) goto out; + err = set_has_smi_cap(dev); + if (err) + goto out; + err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3a309f6a4a15..b8d69aeb1784 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -289,6 +289,7 @@ struct mlx5_port_caps { int gid_table_len; int pkey_table_len; u8 ext_port_cap; + bool has_smi; }; struct mlx5_cmd_mailbox { -- cgit v1.2.3 From 1e0e50b6177bbc83e8cf673a29b2842c769f90f4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 18 Jan 2017 14:10:34 +0200 Subject: IB/mlx5: Avoid SMP MADs from VFs According to the device specification, we need to check that the has_smi bit is set in vport context before allowing send SMP MADs from VF. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Maor Gottlieb Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 90210745366a..9af9b953ce30 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3979,6 +3979,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_SMI: + if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); + err = -EPERM; + *bad_wr = wr; + goto out; + } case MLX5_IB_QPT_HW_GSI: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); -- cgit v1.2.3 From ed88451e1f2d400fd6a743d0a481631cf9f97550 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Wed, 18 Jan 2017 14:10:35 +0200 Subject: IB/mlx5: Assign DSCP for R-RoCE QPs Address Path For Routable RoCE QPs, the DSCP should be set in the QP's address path. The DSCP's value is derived from the traffic class. Fixes: 2811ba51b049 ("IB/mlx5: Add RoCE fields to Address Vector") Cc: Achiad Shochat Signed-off-by: Majd Dibbiny Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/qp.c | 7 +++++++ 3 files changed, 30 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1dea4073d83f..6a81f0273f45 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -325,6 +325,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); } +int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, + int index, enum ib_gid_type *gid_type) +{ + struct ib_gid_attr attr; + union ib_gid gid; + int ret; + + ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr); + if (ret) + return ret; + + if (!attr.ndev) + return -ENODEV; + + dev_put(attr.ndev); + + *gid_type = attr.gid_type; + + return 0; +} + static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e1a4b93dce6b..dda01d7e8847 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -872,6 +872,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int index); +int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, + int index, enum ib_gid_type *gid_type); /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9af9b953ce30..e22d9572ae8f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2198,6 +2198,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, { enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); int err; + enum ib_gid_type gid_type; if (attr_mask & IB_QP_PKEY_INDEX) path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index : @@ -2216,10 +2217,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ll == IB_LINK_LAYER_ETHERNET) { if (!(ah->ah_flags & IB_AH_GRH)) return -EINVAL; + err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index, + &gid_type); + if (err) + return err; memcpy(path->rmac, ah->dmac, sizeof(ah->dmac)); path->udp_sport = mlx5_get_roce_udp_sport(dev, port, ah->grh.sgid_index); path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f; } else { path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; path->fl_free_ar |= -- cgit v1.2.3 From 5abb0da9cd438483d80517b1d5f0a62a2f818426 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 18 Jan 2017 14:10:36 +0200 Subject: IB/mlx5: Remove deprecated module parameter Commit 9603b61de1ee ("mlx5: Move pci device handling from mlx5_ib to mlx5_core") moved prof_sel module parameter from mlx5_ib to mlx5_core and marked it as deprecated in 2014. Three years after deprecation, it is time to remove the deprecated module parameter. Signed-off-by: Leon Romanovsky Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6a81f0273f45..fc02f5f05b38 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -64,10 +64,6 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); -static int deprecated_prof_sel = 2; -module_param_named(prof_sel, deprecated_prof_sel, int, 0444); -MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core"); - static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; @@ -3479,9 +3475,6 @@ static int __init mlx5_ib_init(void) { int err; - if (deprecated_prof_sel != 2) - pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); - err = mlx5_register_interface(&mlx5_ib_interface); return err; -- cgit v1.2.3 From e04a01837729663572baf7164100e28265de7ed8 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 18 Jan 2017 14:59:47 +0200 Subject: net/mlx5: Consolidate flow rules regardless their flow tag Flow rules with same match criteria and value should be mapped to the same flow table entry regardless the flow tag identifier. Flow tag is part of flow table entry context and not of the destination, therefore we should return error when we try to add destination to flow table entry with different flow tag. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0ac7a2fc916c..2f4eb99a50fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1232,10 +1232,18 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, fs_for_each_fte(fte, fg) { nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && - (flow_act->action & fte->action) && - flow_act->flow_tag == fte->flow_tag) { + (flow_act->action & fte->action)) { int old_action = fte->action; + if (fte->flow_tag != flow_act->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_tag, + flow_act->flow_tag); + handle = ERR_PTR(-EEXIST); + goto unlock_fte; + } + fte->action |= flow_act->action; handle = add_rule_fte(fte, fg, dest, dest_num, old_action != flow_act->action); -- cgit v1.2.3 From 94e03f11ad1f8c947b69fa187412ff04783b2a96 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Wed, 18 Jan 2017 14:59:49 +0200 Subject: IB/uverbs: Add support for flow tag The struct ib_uverbs_flow_spec_action_tag associates a tag_id with the flow defined by any number of other flow_spec entries which can reference L2, L3, and L4 packet contents. Use of ib_uverbs_flow_spec_action_tag allows the consumer to identify the set of rules which where matched by the packet by examining the tag_id in the CQE. Signed-off-by: Moses Reuben Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 35 +++++++++++++++++++++++++++++++++-- include/uapi/rdma/ib_user_verbs.h | 13 +++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 455034ac994e..e1bedf0bac04 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; + struct ib_uverbs_flow_spec_action_tag flow_tag; }; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 700782203483..0834dce0a490 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3143,6 +3143,25 @@ out_put: return ret ? ret : in_len; } +static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ib_spec->type = kern_spec->type; + switch (ib_spec->type) { + case IB_FLOW_SPEC_ACTION_TAG: + if (kern_spec->flow_tag.size != + sizeof(struct ib_uverbs_flow_spec_action_tag)) + return -EINVAL; + + ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag); + ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id; + break; + default: + return -EINVAL; + } + return 0; +} + static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) { /* Returns user space filter size, includes padding */ @@ -3167,8 +3186,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, return kern_filter_size; } -static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, - union ib_flow_spec *ib_spec) +static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) { ssize_t actual_filter_sz; ssize_t kern_filter_sz; @@ -3263,6 +3282,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, return 0; } +static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + if (kern_spec->reserved) + return -EINVAL; + + if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) + return kern_spec_to_ib_spec_action(kern_spec, ib_spec); + else + return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); +} + int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index dfdfe4e92d31..b458fea590b6 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -929,6 +929,19 @@ struct ib_uverbs_flow_spec_ipv6 { struct ib_uverbs_flow_ipv6_filter mask; }; +struct ib_uverbs_flow_spec_action_tag { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + __u32 tag_id; + __u32 reserved1; +}; + struct ib_uverbs_flow_tunnel_filter { __be32 tunnel_id; }; -- cgit v1.2.3 From 2ac693f9953a2787f942abcbea5d1dc22a310932 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Wed, 18 Jan 2017 14:59:50 +0200 Subject: IB/mlx5: Add flow tag support Set flow tag in flow table entry, when IB_FLOW_SPEC_ACTION_TAG is part of the flow specifications. Flow tag doesn't support multicast flows, so it's passing to hardware only when used. Signed-off-by: Moses Reuben Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fc02f5f05b38..2e85f73c3235 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1669,6 +1669,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port #define LAST_TUNNEL_FIELD tunnel_id +#define LAST_FLOW_TAG_FIELD tag_id /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -1679,7 +1680,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) sizeof(filter.field)) static int parse_flow_attr(u32 *match_c, u32 *match_v, - const union ib_flow_spec *ib_spec) + const union ib_flow_spec *ib_spec, u32 *tag_id) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); @@ -1871,6 +1872,15 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, ntohl(ib_spec->tunnel.val.tunnel_id)); break; + case IB_FLOW_SPEC_ACTION_TAG: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, + LAST_FLOW_TAG_FIELD)) + return -EOPNOTSUPP; + if (ib_spec->flow_tag.tag_id >= BIT(24)) + return -EINVAL; + + *tag_id = ib_spec->flow_tag.tag_id; + break; default: return -EINVAL; } @@ -2054,6 +2064,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_flow_spec *spec; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; + u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; int err = 0; if (!is_valid_attr(flow_attr)) @@ -2070,7 +2081,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(spec->match_criteria, - spec->match_value, ib_flow); + spec->match_value, ib_flow, &flow_tag); if (err < 0) goto free; @@ -2080,7 +2091,16 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + + if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", + flow_tag, flow_attr->type); + err = -EINVAL; + goto free; + } + flow_act.flow_tag = flow_tag; handler->rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1); -- cgit v1.2.3 From 1ffd3a26f890222e49c4f4782763e2063026b611 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 18 Jan 2017 14:59:51 +0200 Subject: IB/mlx5: Replace ENOTSUPP usage with EOPNOTSUPP Flow steering is supposed to return EOPNOTSUPP error for unsupported fields and not ENOTSUPP error. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2e85f73c3235..0c12c1d6735d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1704,7 +1704,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) - return -ENOTSUPP; + return -EOPNOTSUPP; ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), @@ -1752,7 +1752,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, break; case IB_FLOW_SPEC_IPV4: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) - return -ENOTSUPP; + return -EOPNOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); @@ -1784,7 +1784,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) - return -ENOTSUPP; + return -EOPNOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); @@ -1825,7 +1825,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) - return -ENOTSUPP; + return -EOPNOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); @@ -1845,7 +1845,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, case IB_FLOW_SPEC_UDP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) - return -ENOTSUPP; + return -EOPNOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); @@ -1865,7 +1865,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, case IB_FLOW_SPEC_VXLAN_TUNNEL: if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, LAST_TUNNEL_FIELD)) - return -ENOTSUPP; + return -EOPNOTSUPP; MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, ntohl(ib_spec->tunnel.mask.tunnel_id)); -- cgit v1.2.3 From 7c16f47779498650e9f11a395f8d63accedf35a3 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 18 Jan 2017 15:25:09 +0200 Subject: IB/mlx5: Expose Q counters groups only if they are supported by FW This patch modify the Q counters implementation, so each one of the three Q counters groups will be exposed by the driver only if they are supported by the firmware. Signed-off-by: Kamal Heib Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 158 ++++++++++++++++++++++++++--------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 +- drivers/infiniband/hw/mlx5/qp.c | 4 +- 3 files changed, 128 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0c12c1d6735d..f8fe98d22965 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3072,13 +3072,102 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } +struct mlx5_ib_q_counter { + const char *name; + size_t offset; +}; + +#define INIT_Q_COUNTER(_name) \ + { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} + +static const struct mlx5_ib_q_counter basic_q_cnts[] = { + INIT_Q_COUNTER(rx_write_requests), + INIT_Q_COUNTER(rx_read_requests), + INIT_Q_COUNTER(rx_atomic_requests), + INIT_Q_COUNTER(out_of_buffer), +}; + +static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = { + INIT_Q_COUNTER(out_of_sequence), +}; + +static const struct mlx5_ib_q_counter retrans_q_cnts[] = { + INIT_Q_COUNTER(duplicate_request), + INIT_Q_COUNTER(rnr_nak_retry_err), + INIT_Q_COUNTER(packet_seq_err), + INIT_Q_COUNTER(implied_nak_seq_err), + INIT_Q_COUNTER(local_ack_timeout_err), +}; + static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) { unsigned int i; - for (i = 0; i < dev->num_ports; i++) + for (i = 0; i < dev->num_ports; i++) { mlx5_core_dealloc_q_counter(dev->mdev, - dev->port[i].q_cnt_id); + dev->port[i].q_cnts.set_id); + kfree(dev->port[i].q_cnts.names); + kfree(dev->port[i].q_cnts.offsets); + } +} + +static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev, + const char ***names, + size_t **offsets, + u32 *num) +{ + u32 num_counters; + + num_counters = ARRAY_SIZE(basic_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) + num_counters += ARRAY_SIZE(out_of_seq_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) + num_counters += ARRAY_SIZE(retrans_q_cnts); + + *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL); + if (!*names) + return -ENOMEM; + + *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL); + if (!*offsets) + goto err_names; + + *num = num_counters; + + return 0; + +err_names: + kfree(*names); + return -ENOMEM; +} + +static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev, + const char **names, + size_t *offsets) +{ + int i; + int j = 0; + + for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { + names[j] = basic_q_cnts[i].name; + offsets[j] = basic_q_cnts[i].offset; + } + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { + for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { + names[j] = out_of_seq_q_cnts[i].name; + offsets[j] = out_of_seq_q_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { + names[j] = retrans_q_cnts[i].name; + offsets[j] = retrans_q_cnts[i].offset; + } + } } static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) @@ -3087,14 +3176,26 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) int ret; for (i = 0; i < dev->num_ports; i++) { + struct mlx5_ib_port *port = &dev->port[i]; + ret = mlx5_core_alloc_q_counter(dev->mdev, - &dev->port[i].q_cnt_id); + &port->q_cnts.set_id); if (ret) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", i + 1, ret); goto dealloc_counters; } + + ret = __mlx5_ib_alloc_q_counters(dev, + &port->q_cnts.names, + &port->q_cnts.offsets, + &port->q_cnts.num_counters); + if (ret) + goto dealloc_counters; + + mlx5_ib_fill_q_counters(dev, port->q_cnts.names, + port->q_cnts.offsets); } return 0; @@ -3102,62 +3203,39 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) dealloc_counters: while (--i >= 0) mlx5_core_dealloc_q_counter(dev->mdev, - dev->port[i].q_cnt_id); + dev->port[i].q_cnts.set_id); return ret; } -static const char * const names[] = { - "rx_write_requests", - "rx_read_requests", - "rx_atomic_requests", - "out_of_buffer", - "out_of_sequence", - "duplicate_request", - "rnr_nak_retry_err", - "packet_seq_err", - "implied_nak_seq_err", - "local_ack_timeout_err", -}; - -static const size_t stats_offsets[] = { - MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests), - MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests), - MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests), - MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer), - MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence), - MLX5_BYTE_OFF(query_q_counter_out, duplicate_request), - MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err), - MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err), - MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err), - MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err), -}; - static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) { - BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets)); + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_port *port = &dev->port[port_num - 1]; /* We support only per port stats */ if (port_num == 0) return NULL; - return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names), + return rdma_alloc_hw_stats_struct(port->q_cnts.names, + port->q_cnts.num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) + u8 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_port *port = &dev->port[port_num - 1]; int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); void *out; __be32 val; int ret; int i; - if (!port || !stats) + if (!stats) return -ENOSYS; out = mlx5_vzalloc(outlen); @@ -3165,18 +3243,19 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, return -ENOMEM; ret = mlx5_core_query_q_counter(dev->mdev, - dev->port[port - 1].q_cnt_id, 0, + port->q_cnts.set_id, 0, out, outlen); if (ret) goto free; - for (i = 0; i < ARRAY_SIZE(names); i++) { - val = *(__be32 *)(out + stats_offsets[i]); + for (i = 0; i < port->q_cnts.num_counters; i++) { + val = *(__be32 *)(out + port->q_cnts.offsets[i]); stats->value[i] = (u64)be32_to_cpu(val); } + free: kvfree(out); - return ARRAY_SIZE(names); + return port->q_cnts.num_counters; } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) @@ -3328,8 +3407,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); } - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && - MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dda01d7e8847..66090835de27 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -579,8 +579,15 @@ struct mlx5_ib_resources { struct mutex mutex; }; +struct mlx5_ib_q_counters { + const char **names; + size_t *offsets; + u32 num_counters; + u16 set_id; +}; + struct mlx5_ib_port { - u16 q_cnt_id; + struct mlx5_ib_q_counters q_cnts; }; struct mlx5_roce { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e22d9572ae8f..5c7d655655bb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2784,7 +2784,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->port) - 1; mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= - cpu_to_be32((u32)(mibport->q_cnt_id) << 24); + cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24); } if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) @@ -2812,7 +2812,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.operation = op; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; + raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } -- cgit v1.2.3 From 23a6964e3adb0796e1633562a574839b92360cb6 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Wed, 18 Jan 2017 15:25:10 +0200 Subject: IB/mlx5: Add port counter support for Receive WQs Counters weren't updated due to Receive WQs' traffic since the counter-id was not associated with the RQ. Added support for associating the q-counter-id with the Receive WQ. The attachment is done only when changing WQ's state from RESET to READY in modify-WQ command. FW support is required for the above, without this support Receive WQ counters will not count. Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 12 +++++++++++- include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5c7d655655bb..f395ee9d2fea 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2429,7 +2429,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) { if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { MLX5_SET64(modify_rq_in, in, modify_bitmask, - MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID); + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); } else pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", @@ -4910,6 +4910,16 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); MLX5_SET(rqc, rqc, state, wq_state); + if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) { + if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); + MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id); + } else + pr_info_once("%s: Receive WQ counters are not supported on current FW\n", + dev->ib_dev.name); + } + err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); kvfree(in); if (!err) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 37327f6ba9cb..2d197d8a7025 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4937,7 +4937,7 @@ struct mlx5_ifc_modify_rq_out_bits { enum { MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, - MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3, }; struct mlx5_ifc_modify_rq_in_bits { -- cgit v1.2.3 From 5f23d4265f8ee4d7b76356992931abec7888d372 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 18 Jan 2017 15:39:58 +0200 Subject: IB/uverbs: Expose vlan offloads capabilities Expose raw packet capabilities to user space as part of query device. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 6 ++++++ include/uapi/rdma/ib_user_verbs.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 0834dce0a490..e4ae0c62df36 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -4354,6 +4354,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.max_wq_type_rq = attr.max_wq_type_rq; resp.response_length += sizeof(resp.max_wq_type_rq); + + if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps)) + goto end; + + resp.raw_packet_caps = attr.raw_packet_caps; + resp.response_length += sizeof(resp.raw_packet_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index b458fea590b6..0db9e646edd3 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -247,7 +247,7 @@ struct ib_uverbs_ex_query_device_resp { __u64 device_cap_flags_ex; struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; - __u32 reserved; + __u32 raw_packet_caps; }; struct ib_uverbs_query_port { -- cgit v1.2.3 From af1cb95d2e34133e0cf7f48d6045da888414b867 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 18 Jan 2017 15:39:59 +0200 Subject: IB/uverbs: Enable WQ creation and modification with cvlan offload Enable user space application via WQ creation and modification to turn on and off cvlan offload. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 9 ++++++++- include/uapi/rdma/ib_user_verbs.h | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e4ae0c62df36..0eb204380bff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3356,6 +3356,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, wq_init_attr.wq_context = file; wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; + if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) + + sizeof(cmd.create_flags))) + wq_init_attr.create_flags = cmd.create_flags; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); wq = pd->device->create_wq(pd, &wq_init_attr, uhw); @@ -3511,7 +3514,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (!cmd.attr_mask) return -EINVAL; - if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) + if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; wq = idr_read_wq(cmd.wq_handle, file->ucontext); @@ -3520,6 +3523,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.curr_wq_state = cmd.curr_wq_state; wq_attr.wq_state = cmd.wq_state; + if (cmd.attr_mask & IB_WQ_FLAGS) { + wq_attr.flags = cmd.flags; + wq_attr.flags_mask = cmd.flags_mask; + } ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); put_wq_read(wq); return ret; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 0db9e646edd3..f8723580ffed 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -1061,6 +1061,8 @@ struct ib_uverbs_ex_create_wq { __u32 cq_handle; __u32 max_wr; __u32 max_sge; + __u32 create_flags; /* Use enum ib_wq_flags */ + __u32 reserved; }; struct ib_uverbs_ex_create_wq_resp { @@ -1089,6 +1091,8 @@ struct ib_uverbs_ex_modify_wq { __u32 wq_handle; __u32 wq_state; __u32 curr_wq_state; + __u32 flags; /* Use enum ib_wq_flags */ + __u32 flags_mask; /* Use enum ib_wq_flags */ }; /* Prevent memory allocation rather than max expected size */ -- cgit v1.2.3 From 9e1b161f3b8f14f2459fa20c26e41e40e049d90e Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 18 Jan 2017 15:40:00 +0200 Subject: IB/uverbs: Enable QP creation with cvlan offload Enable user applications to create a QP with cvlan stripping offload. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 0eb204380bff..b4b395a054ac 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1891,7 +1891,8 @@ static int create_qp(struct ib_uverbs_file *file, IB_QP_CREATE_CROSS_CHANNEL | IB_QP_CREATE_MANAGED_SEND | IB_QP_CREATE_MANAGED_RECV | - IB_QP_CREATE_SCATTER_FCS)) { + IB_QP_CREATE_SCATTER_FCS | + IB_QP_CREATE_CVLAN_STRIPPING)) { ret = -EINVAL; goto err_put; } -- cgit v1.2.3 From e8161334403ed34a9214bcce517d50bbe3e02b97 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 18 Jan 2017 15:40:01 +0200 Subject: IB/mlx5: Expose vlan offloads capabilities Check device's capabilities and report which raw packet capabilities are supported. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f8fe98d22965..fe37da2be26f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -581,8 +581,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { - if (MLX5_CAP_ETH(mdev, csum_cap)) + if (MLX5_CAP_ETH(mdev, csum_cap)) { + /* Legacy bit to support old userspace libraries */ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM; + } + + if (MLX5_CAP_ETH(dev->mdev, vlan_cap)) + props->raw_packet_caps |= + IB_RAW_PACKET_CAP_CVLAN_STRIPPING; if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); @@ -621,8 +628,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && - MLX5_CAP_ETH(dev->mdev, scatter_fcs)) + MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { + /* Legacy bit to support old userspace libraries */ props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; + props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS; + } if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; -- cgit v1.2.3 From b1f74a8437a38d1c0139175f2ad469d2517a7882 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 18 Jan 2017 15:40:02 +0200 Subject: IB/mlx5: Enable WQ creation and modification with cvlan offload Allow creating a WQ with cvlan stripping considering device's capabilities. The default value was fixed to disable vlan stripping till was asked explicitly. In addition, allow modification of a WQ to turn on/off this property. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index f395ee9d2fea..59bbe246c92d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4624,9 +4624,20 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size); MLX5_SET(wq, wq, wq_signature, rwq->wq_sig); MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); + if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) { + if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, vlan_cap))) { + mlx5_ib_dbg(dev, "VLAN offloads are not supported\n"); + err = -EOPNOTSUPP; + goto out; + } + } else { + MLX5_SET(rqc, rqc, vsd, 1); + } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); +out: kvfree(in); return err; } @@ -4910,6 +4921,22 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); MLX5_SET(rqc, rqc, state, wq_state); + if (wq_attr_mask & IB_WQ_FLAGS) { + if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) { + if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, vlan_cap))) { + mlx5_ib_dbg(dev, "VLAN offloads are not " + "supported\n"); + err = -EOPNOTSUPP; + goto out; + } + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, + (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1); + } + } + if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) { if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { MLX5_SET64(modify_rq_in, in, modify_bitmask, @@ -4921,9 +4948,10 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, } err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); - kvfree(in); if (!err) rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; +out: + kvfree(in); return err; } -- cgit v1.2.3 From e4cc4fa7cca9ac7d7c3abea7d6c90db1c519d6c6 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 18 Jan 2017 15:40:03 +0200 Subject: IB/mlx5: Enable QP creation with cvlan offload Enable creating a RAW Ethernet QP with cvlan stripping offload when it's supported by the hardware. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ++++++ drivers/infiniband/hw/mlx5/qp.c | 13 ++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 66090835de27..13bef19649f2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -220,6 +220,10 @@ struct wr_list { u16 next; }; +enum mlx5_ib_rq_flags { + MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0, +}; + struct mlx5_ib_wq { u64 *wrid; u32 *wr_data; @@ -308,6 +312,7 @@ struct mlx5_ib_rq { struct mlx5_db *doorbell; u32 tirn; u8 state; + u32 flags; }; struct mlx5_ib_sq { @@ -392,6 +397,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, MLX5_IB_QP_RSS = 1 << 8, + MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 59bbe246c92d..0b2b17d267a8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1141,7 +1141,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, return -ENOMEM; rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); - MLX5_SET(rqc, rqc, vsd, 1); + if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING)) + MLX5_SET(rqc, rqc, vsd, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); @@ -1238,6 +1239,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->rq.wqe_cnt) { rq->base.container_mibqp = qp; + if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING) + rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; err = create_raw_packet_qp_rq(dev, rq, in); if (err) goto err_destroy_sq; @@ -1559,6 +1562,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) { + if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, vlan_cap)) || + (init_attr->qp_type != IB_QPT_RAW_PACKET)) + return -EOPNOTSUPP; + qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING; + } + if (pd && pd->uobject) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { mlx5_ib_dbg(dev, "copy failed\n"); -- cgit v1.2.3 From 4be6da1e5b8dde8b163c540fca5745644a6a9e00 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 18 Jan 2017 15:40:04 +0200 Subject: IB/mlx5: Support creation of a WQ with scatter FCS offload Add support for creation of a WQ with scatter FCS capability, if this capability is supported by the hardware. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0b2b17d267a8..fbfff1269816 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4604,6 +4604,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { struct mlx5_ib_dev *dev; + int has_net_offloads; __be64 *rq_pas0; void *in; void *rqc; @@ -4635,9 +4636,9 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size); MLX5_SET(wq, wq, wq_signature, rwq->wq_sig); MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); + has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads); if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) { - if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && - MLX5_CAP_ETH(dev->mdev, vlan_cap))) { + if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) { mlx5_ib_dbg(dev, "VLAN offloads are not supported\n"); err = -EOPNOTSUPP; goto out; @@ -4645,6 +4646,14 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } else { MLX5_SET(rqc, rqc, vsd, 1); } + if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) { + if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) { + mlx5_ib_dbg(dev, "Scatter FCS is not supported\n"); + err = -EOPNOTSUPP; + goto out; + } + MLX5_SET(rqc, rqc, scatter_fcs, 1); + } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); -- cgit v1.2.3 From d07d1d70ce1ad1c525f51f459ce36ca49ec2bf48 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Jan 2017 16:58:07 +0200 Subject: IB/umem: Update on demand page (ODP) support Currently ODP MR may explicitly register virtual address space area of limited length. This change allows MR to cover entire process virtual address space dynamicaly adding/removing translation entries to device MTT. Add following changes to support implicit MR: * Allow umem to be zero size to back-up implicit MR. * Add new function ib_alloc_odp_umem() to add virtual memory regions to implicit MR dynamically on demand. * Add new function rbt_ib_umem_lookup() to find dynamically added virtual memory regions. * Expose function rbt_ib_umem_for_each_in_range() to other modules and make it safe Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 3 -- drivers/infiniband/core/umem_odp.c | 87 +++++++++++++++++++++++++++++++---- drivers/infiniband/core/umem_rbtree.c | 21 +++++++-- include/rdma/ib_umem_odp.h | 21 +++++++-- 4 files changed, 113 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 1e62a5f0cb28..9f9630b1bc7b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -99,9 +99,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (dmasync) dma_attrs |= DMA_ATTR_WRITE_BARRIER; - if (!size) - return ERR_PTR(-EINVAL); - /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 6b079a31dced..1104d367b8f2 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -239,6 +239,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = { .invalidate_range_end = ib_umem_notifier_invalidate_range_end, }; +struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, + unsigned long addr, + size_t size) +{ + struct ib_umem *umem; + struct ib_umem_odp *odp_data; + int pages = size >> PAGE_SHIFT; + int ret; + + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + + umem->context = context; + umem->length = size; + umem->address = addr; + umem->page_size = PAGE_SIZE; + umem->writable = 1; + + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); + if (!odp_data) { + ret = -ENOMEM; + goto out_umem; + } + odp_data->umem = umem; + + mutex_init(&odp_data->umem_mutex); + init_completion(&odp_data->notifier_completion); + + odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list)); + if (!odp_data->page_list) { + ret = -ENOMEM; + goto out_odp_data; + } + + odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list)); + if (!odp_data->dma_list) { + ret = -ENOMEM; + goto out_page_list; + } + + down_write(&context->umem_rwsem); + context->odp_mrs_count++; + rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree); + if (likely(!atomic_read(&context->notifier_count))) + odp_data->mn_counters_active = true; + else + list_add(&odp_data->no_private_counters, + &context->no_private_counters); + up_write(&context->umem_rwsem); + + umem->odp_data = odp_data; + + return umem; + +out_page_list: + vfree(odp_data->page_list); +out_odp_data: + kfree(odp_data); +out_umem: + kfree(umem); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ib_alloc_odp_umem); + int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) { int ret_val; @@ -270,18 +335,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) init_completion(&umem->odp_data->notifier_completion); - umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) * + if (ib_umem_num_pages(umem)) { + umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->page_list)); - if (!umem->odp_data->page_list) { - ret_val = -ENOMEM; - goto out_odp_data; - } + if (!umem->odp_data->page_list) { + ret_val = -ENOMEM; + goto out_odp_data; + } - umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) * + umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->dma_list)); - if (!umem->odp_data->dma_list) { - ret_val = -ENOMEM; - goto out_page_list; + if (!umem->odp_data->dma_list) { + ret_val = -ENOMEM; + goto out_page_list; + } } /* @@ -466,6 +533,7 @@ static int ib_umem_odp_map_dma_single_page( } umem->odp_data->dma_list[page_index] = dma_addr | access_mask; umem->odp_data->page_list[page_index] = page; + umem->npages++; stored_page = 1; } else if (umem->odp_data->page_list[page_index] == page) { umem->odp_data->dma_list[page_index] |= access_mask; @@ -665,6 +733,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, put_page(page); umem->odp_data->page_list[idx] = NULL; umem->odp_data->dma_list[idx] = 0; + umem->npages--; } } mutex_unlock(&umem->odp_data->umem_mutex); diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c index 727d788448f5..d176597b4d78 100644 --- a/drivers/infiniband/core/umem_rbtree.c +++ b/drivers/infiniband/core/umem_rbtree.c @@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root, void *cookie) { int ret_val = 0; - struct umem_odp_node *node; + struct umem_odp_node *node, *next; struct ib_umem_odp *umem; if (unlikely(start == last)) return ret_val; - for (node = rbt_ib_umem_iter_first(root, start, last - 1); node; - node = rbt_ib_umem_iter_next(node, start, last - 1)) { + for (node = rbt_ib_umem_iter_first(root, start, last - 1); + node; node = next) { + next = rbt_ib_umem_iter_next(node, start, last - 1); umem = container_of(node, struct ib_umem_odp, interval_tree); ret_val = cb(umem->umem, start, last, cookie) || ret_val; } return ret_val; } +EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); + +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, + u64 addr, u64 length) +{ + struct umem_odp_node *node; + + node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); + if (node) + return container_of(node, struct ib_umem_odp, interval_tree); + return NULL; + +} +EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 3da0b167041b..542cd8b3414c 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -79,11 +79,15 @@ struct ib_umem_odp { struct completion notifier_completion; int dying; + struct work_struct work; }; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem); +struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, + unsigned long addr, + size_t size); void ib_umem_odp_release(struct ib_umem *umem); @@ -117,10 +121,12 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, umem_call_back cb, void *cookie); -struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root, - u64 start, u64 last); -struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node, - u64 start, u64 last); +/* + * Find first region intersecting with address range. + * Return NULL if not found + */ +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, + u64 addr, u64 length); static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, unsigned long mmu_seq) @@ -153,6 +159,13 @@ static inline int ib_umem_odp_get(struct ib_ucontext *context, return -EINVAL; } +static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, + unsigned long addr, + size_t size) +{ + return ERR_PTR(-EINVAL); +} + static inline void ib_umem_odp_release(struct ib_umem *umem) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -- cgit v1.2.3 From d9d0674c0f8a950634fd2166a291e93e606a576c Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Jan 2017 16:58:08 +0200 Subject: IB/umem: Indicate that process is being terminated When process is killed while pagefault operation still in progress - function will fail. In this specific case we don't want any warnings in dmesg to avoid log analyzers false alerts. So we need distinct error code for this case. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem_odp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 1104d367b8f2..f2fc0431512d 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -573,7 +573,8 @@ out: * for failure. * An -EAGAIN error code is returned when a concurrent mmu notifier prevents * the function from completing its task. - * + * An -ENOENT error code indicates that userspace process is being terminated + * and mm was already destroyed. * @umem: the umem to map and pin * @user_virt: the address from which we need to map. * @bcnt: the minimal number of bytes to pin and map. The mapping might be @@ -621,7 +622,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, owning_mm = get_task_mm(owning_process); if (owning_mm == NULL) { - ret = -EINVAL; + ret = -ENOENT; goto out_put_task; } -- cgit v1.2.3 From 94990b498969b420949a04294618f9509466b896 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Jan 2017 16:58:09 +0200 Subject: IB/mlx5: Add null_mkey access Add mlx5_cmd_null_mkey() function to access null_mkey information from firmware. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/cmd.c | 48 +++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/cmd.h | 40 +++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/mlx5/cmd.c create mode 100644 drivers/infiniband/hw/mlx5/cmd.h (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 7493a83acd28..90ad2adc752f 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c new file mode 100644 index 000000000000..cdc2d3017da7 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cmd.h" + +int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + int err; + + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *null_mkey = MLX5_GET(query_special_contexts_out, out, + null_mkey); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h new file mode 100644 index 000000000000..7ca8a7b6434d --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_IB_CMD_H +#define MLX5_IB_CMD_H + +#include +#include + +int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); +#endif /* MLX5_IB_CMD_H */ -- cgit v1.2.3 From 49780d42dfc9ec0f4090c32ca59688449da1a1cd Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Jan 2017 16:58:10 +0200 Subject: IB/mlx5: Expose MR cache for mlx5_ib Allow other parts of mlx5_ib to use MR cache mechanism. * Add new functions mlx5_mr_cache_alloc and mlx5_mr_cache_free * Traditional MTT MKey buckets are limited by MAX_UMR_CACHE_ENTRY Additinal buckets may be added above. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 +++- drivers/infiniband/hw/mlx5/mr.c | 99 ++++++++++++++++++++++++++---------- include/linux/mlx5/driver.h | 3 +- 3 files changed, 82 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 13bef19649f2..efc44de3c7d7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -541,6 +541,10 @@ struct mlx5_cache_ent { struct dentry *dir; char name[4]; u32 order; + u32 xlt; + u32 access_mode; + u32 page; + u32 size; u32 cur; u32 miss; @@ -555,6 +559,7 @@ struct mlx5_cache_ent { struct work_struct work; struct delayed_work dwork; int pending; + struct completion compl; }; struct mlx5_mr_cache { @@ -837,7 +842,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); -int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); + +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); +void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8cf2a67f9fb0..8f5b94d483e4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -49,6 +49,7 @@ enum { static int clean_mr(struct mlx5_ib_mr *mr); static int use_umr(struct mlx5_ib_dev *dev, int order); +static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { @@ -149,6 +150,9 @@ static void reg_mr_callback(int status, void *context) if (err) pr_err("Error inserting to mkey tree. 0x%x\n", -err); write_unlock_irqrestore(&table->lock, flags); + + if (!completion_done(&ent->compl)) + complete(&ent->compl); } static int add_keys(struct mlx5_ib_dev *dev, int c, int num) @@ -157,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) struct mlx5_cache_ent *ent = &cache->ent[c]; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; - int npages = 1 << ent->order; void *mkc; u32 *in; int err = 0; @@ -185,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, access_mode, ent->access_mode); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); - MLX5_SET(mkc, mkc, log_page_size, 12); + MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, log_page_size, ent->page); spin_lock_irq(&ent->lock); ent->pending++; @@ -447,6 +450,42 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + struct mlx5_ib_mr *mr; + int err; + + if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); + return NULL; + } + + ent = &cache->ent[entry]; + while (1) { + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + + err = add_keys(dev, entry, 1); + if (err) + return ERR_PTR(err); + + wait_for_completion(&ent->compl); + } else { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, + list); + list_del(&mr->list); + ent->cur--; + spin_unlock_irq(&ent->lock); + if (ent->cur < ent->limit) + queue_work(cache->wq, &ent->work); + return mr; + } + } +} + static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) { struct mlx5_mr_cache *cache = &dev->cache; @@ -456,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) int i; c = order2idx(dev, order); - if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + if (c < 0 || c > MAX_UMR_CACHE_ENTRY) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; } - for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { + for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) { ent = &cache->ent[i]; mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); @@ -488,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) return mr; } -static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; @@ -500,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); return; } + + if (unreg_umr(dev, mr)) + return; + ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); @@ -602,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; - int limit; int err; int i; @@ -615,26 +657,33 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - INIT_LIST_HEAD(&cache->ent[i].head); - spin_lock_init(&cache->ent[i].lock); - ent = &cache->ent[i]; INIT_LIST_HEAD(&ent->head); spin_lock_init(&ent->lock); ent->order = i + 2; ent->dev = dev; + ent->limit = 0; - if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - mlx5_core_is_pf(dev->mdev) && - use_umr(dev, ent->order)) - limit = dev->mdev->profile->mr_cache[i].limit; - else - limit = 0; - + init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - ent->limit = limit; queue_work(cache->wq, &ent->work); + + if (i > MAX_UMR_CACHE_ENTRY) + continue; + + if (!use_umr(dev, ent->order)) + continue; + + ent->page = PAGE_SHIFT; + ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / + MLX5_IB_UMR_OCTOWORD; + ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + mlx5_core_is_pf(dev->mdev)) + ent->limit = dev->mdev->profile->mr_cache[i].limit; + else + ent->limit = 0; } err = mlx5_mr_cache_debugfs_init(dev); @@ -758,7 +807,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size) static int use_umr(struct mlx5_ib_dev *dev, int order) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return order < MAX_MR_CACHE_ENTRIES + 2; + return order <= MAX_UMR_CACHE_ENTRY + 2; return order <= MLX5_MAX_UMR_SHIFT; } @@ -871,7 +920,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, MLX5_IB_UPD_XLT_ENABLE); if (err) { - free_cached_mr(dev, mr); + mlx5_mr_cache_free(dev, mr); return ERR_PTR(err); } @@ -1091,6 +1140,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, goto err_2; } mr->mmkey.type = MLX5_MKEY_MR; + mr->desc_size = sizeof(struct mlx5_mtt); mr->umem = umem; mr->dev = dev; mr->live = 1; @@ -1398,12 +1448,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) return err; } } else { - err = unreg_umr(dev, mr); - if (err) { - mlx5_ib_warn(dev, "failed unregister\n"); - return err; - } - free_cached_mr(dev, mr); + mlx5_mr_cache_free(dev, mr); } if (!umred) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b8d69aeb1784..2534b8a0fd7b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1052,7 +1052,8 @@ enum { }; enum { - MAX_MR_CACHE_ENTRIES = 21, + MAX_UMR_CACHE_ENTRY = 20, + MAX_MR_CACHE_ENTRIES }; enum { -- cgit v1.2.3 From 81713d3788d2e6bc005f15ee1c59d0eb06050a6b Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Jan 2017 16:58:11 +0200 Subject: IB/mlx5: Add implicit MR support Add implicit MR, covering entire user address space. The MR is implemented as an indirect KSM MR consisting of 1GB direct MRs. Pages and direct MRs are added/removed to MR by ODP. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 20 +- drivers/infiniband/hw/mlx5/mr.c | 33 ++- drivers/infiniband/hw/mlx5/odp.c | 505 ++++++++++++++++++++++++++++++++--- include/linux/mlx5/driver.h | 2 + 5 files changed, 513 insertions(+), 49 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fe37da2be26f..eb8719ca500e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3583,6 +3583,8 @@ static int __init mlx5_ib_init(void) { int err; + mlx5_ib_odp_init(); + err = mlx5_register_interface(&mlx5_ib_interface); return err; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efc44de3c7d7..3cd064b5f0bf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -202,6 +202,7 @@ struct mlx5_ib_flow_db { #define MLX5_IB_UPD_XLT_ADDR BIT(3) #define MLX5_IB_UPD_XLT_PD BIT(4) #define MLX5_IB_UPD_XLT_ACCESS BIT(5) +#define MLX5_IB_UPD_XLT_INDIRECT BIT(6) /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. * @@ -503,6 +504,10 @@ struct mlx5_ib_mr { int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ + + struct mlx5_ib_mr *parent; + atomic_t num_leaf_free; + wait_queue_head_t q_leaf_free; }; struct mlx5_ib_mw { @@ -637,6 +642,7 @@ struct mlx5_ib_dev { * being used by a page fault handler. */ struct srcu_struct mr_srcu; + u32 null_mkey; #endif struct mlx5_ib_flow_db flow_db; /* protect resources needed as part of reset flow */ @@ -789,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); +struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + int access_flags); +void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -868,6 +877,9 @@ int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end); +void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); +void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, + size_t nentries, struct mlx5_ib_mr *mr, int flags); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -875,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } -static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} +static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } -static inline void mlx5_ib_odp_cleanup(void) {} +static inline void mlx5_ib_odp_cleanup(void) {} +static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} +static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, + size_t nentries, struct mlx5_ib_mr *mr, + int flags) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8f5b94d483e4..3c1f483d003f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -469,7 +469,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) spin_unlock_irq(&ent->lock); err = add_keys(dev, entry, 1); - if (err) + if (err && err != -EAGAIN) return ERR_PTR(err); wait_for_completion(&ent->compl); @@ -669,8 +669,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); queue_work(cache->wq, &ent->work); - if (i > MAX_UMR_CACHE_ENTRY) + if (i > MAX_UMR_CACHE_ENTRY) { + mlx5_odp_init_mr_cache_entry(ent); continue; + } if (!use_umr(dev, ent->order)) continue; @@ -935,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, { struct mlx5_ib_dev *dev = mr->dev; struct ib_umem *umem = mr->umem; + if (flags & MLX5_IB_UPD_XLT_INDIRECT) { + mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); + return npages; + } npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); @@ -968,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; - int desc_size = sizeof(struct mlx5_mtt); + int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) + ? sizeof(struct mlx5_klm) + : sizeof(struct mlx5_mtt); const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; const int page_mask = page_align - 1; size_t pages_mapped = 0; @@ -1186,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (!start && length == U64_MAX) { + if (!(access_flags & IB_ACCESS_ON_DEMAND) || + !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return ERR_PTR(-EINVAL); + + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + return &mr->ibmr; + } +#endif + err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); @@ -1471,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ - mlx5_ib_invalidate_range(umem, ib_umem_start(umem), - ib_umem_end(umem)); + if (umem->odp_data->page_list) + mlx5_ib_invalidate_range(umem, ib_umem_start(umem), + ib_umem_end(umem)); + else + mlx5_ib_free_implicit_mr(mr); /* * We kill the umem before the MR for ODP, * so that there will not be any invalidations in diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e5bc267aca73..d7b12f0750e2 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -34,6 +34,7 @@ #include #include "mlx5_ib.h" +#include "cmd.h" #define MAX_PREFETCH_LEN (4*1024*1024U) @@ -41,6 +42,140 @@ * a pagefault. */ #define MMU_NOTIFIER_TIMEOUT 1000 +#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT) +#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT) +#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS) +#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT) +#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1)) + +#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT + +static u64 mlx5_imr_ksm_entries; + +static int check_parent(struct ib_umem_odp *odp, + struct mlx5_ib_mr *parent) +{ + struct mlx5_ib_mr *mr = odp->private; + + return mr && mr->parent == parent; +} + +static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) +{ + struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; + struct ib_ucontext *ctx = odp->umem->context; + struct rb_node *rb; + + down_read(&ctx->umem_rwsem); + while (1) { + rb = rb_next(&odp->interval_tree.rb); + if (!rb) + goto not_found; + odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); + if (check_parent(odp, parent)) + goto end; + } +not_found: + odp = NULL; +end: + up_read(&ctx->umem_rwsem); + return odp; +} + +static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, + u64 start, u64 length, + struct mlx5_ib_mr *parent) +{ + struct ib_umem_odp *odp; + struct rb_node *rb; + + down_read(&ctx->umem_rwsem); + odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length); + if (!odp) + goto end; + + while (1) { + if (check_parent(odp, parent)) + goto end; + rb = rb_next(&odp->interval_tree.rb); + if (!rb) + goto not_found; + odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); + if (ib_umem_start(odp->umem) > start + length) + goto not_found; + } +not_found: + odp = NULL; +end: + up_read(&ctx->umem_rwsem); + return odp; +} + +void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, + size_t nentries, struct mlx5_ib_mr *mr, int flags) +{ + struct ib_pd *pd = mr->ibmr.pd; + struct ib_ucontext *ctx = pd->uobject->context; + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; + unsigned long va; + int i; + + if (flags & MLX5_IB_UPD_XLT_ZAP) { + for (i = 0; i < nentries; i++, pklm++) { + pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); + pklm->key = cpu_to_be32(dev->null_mkey); + pklm->va = 0; + } + return; + } + + odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE, + nentries * MLX5_IMR_MTT_SIZE, mr); + + for (i = 0; i < nentries; i++, pklm++) { + pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); + va = (offset + i) * MLX5_IMR_MTT_SIZE; + if (odp && odp->umem->address == va) { + struct mlx5_ib_mr *mtt = odp->private; + + pklm->key = cpu_to_be32(mtt->ibmr.lkey); + odp = odp_next(odp); + } else { + pklm->key = cpu_to_be32(dev->null_mkey); + } + mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", + i, va, be32_to_cpu(pklm->key)); + } +} + +static void mr_leaf_free_action(struct work_struct *work) +{ + struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); + int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT; + struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + + mr->parent = NULL; + synchronize_srcu(&mr->dev->mr_srcu); + + if (!READ_ONCE(odp->dying)) { + mr->parent = imr; + if (atomic_dec_and_test(&imr->num_leaf_free)) + wake_up(&imr->q_leaf_free); + return; + } + + ib_umem_release(odp->umem); + if (imr->live) + mlx5_ib_update_xlt(imr, idx, 1, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); + mlx5_mr_cache_free(mr->dev, mr); + + if (atomic_dec_and_test(&imr->num_leaf_free)) + wake_up(&imr->q_leaf_free); +} + void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end) { @@ -111,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, */ ib_umem_odp_unmap_dma_pages(umem, start, end); + + if (unlikely(!umem->npages && mr->parent && + !umem->odp_data->dying)) { + WRITE_ONCE(umem->odp_data->dying, 1); + atomic_inc(&mr->parent->num_leaf_free); + schedule_work(&umem->odp_data->work); + } } void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) @@ -147,6 +289,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && + MLX5_CAP_GEN(dev->mdev, null_mkey) && + MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) + caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; + return; } @@ -184,6 +331,197 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, wq_num); } +static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, + struct ib_umem *umem, + bool ksm, int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr; + int err; + + mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY : + MLX5_IMR_MTT_CACHE_ENTRY); + + if (IS_ERR(mr)) + return mr; + + mr->ibmr.pd = pd; + + mr->dev = dev; + mr->access_flags = access_flags; + mr->mmkey.iova = 0; + mr->umem = umem; + + if (ksm) { + err = mlx5_ib_update_xlt(mr, 0, + mlx5_imr_ksm_entries, + MLX5_KSM_PAGE_SHIFT, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); + + } else { + err = mlx5_ib_update_xlt(mr, 0, + MLX5_IMR_MTT_ENTRIES, + PAGE_SHIFT, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE | + MLX5_IB_UPD_XLT_ATOMIC); + } + + if (err) + goto fail; + + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + + mr->live = 1; + + mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", + mr->mmkey.key, dev->mdev, mr); + + return mr; + +fail: + mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); + mlx5_mr_cache_free(dev, mr); + + return ERR_PTR(err); +} + +static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, + u64 io_virt, size_t bcnt) +{ + struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); + struct ib_umem_odp *odp, *result = NULL; + u64 addr = io_virt & MLX5_IMR_MTT_MASK; + int nentries = 0, start_idx = 0, ret; + struct mlx5_ib_mr *mtt; + struct ib_umem *umem; + + mutex_lock(&mr->umem->odp_data->umem_mutex); + odp = odp_lookup(ctx, addr, 1, mr); + + mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", + io_virt, bcnt, addr, odp); + +next_mr: + if (likely(odp)) { + if (nentries) + nentries++; + } else { + umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE); + if (IS_ERR(umem)) { + mutex_unlock(&mr->umem->odp_data->umem_mutex); + return ERR_CAST(umem); + } + + mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags); + if (IS_ERR(mtt)) { + mutex_unlock(&mr->umem->odp_data->umem_mutex); + ib_umem_release(umem); + return ERR_CAST(mtt); + } + + odp = umem->odp_data; + odp->private = mtt; + mtt->umem = umem; + mtt->mmkey.iova = addr; + mtt->parent = mr; + INIT_WORK(&odp->work, mr_leaf_free_action); + + if (!nentries) + start_idx = addr >> MLX5_IMR_MTT_SHIFT; + nentries++; + } + + odp->dying = 0; + + /* Return first odp if region not covered by single one */ + if (likely(!result)) + result = odp; + + addr += MLX5_IMR_MTT_SIZE; + if (unlikely(addr < io_virt + bcnt)) { + odp = odp_next(odp); + if (odp && odp->umem->address != addr) + odp = NULL; + goto next_mr; + } + + if (unlikely(nentries)) { + ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); + if (ret) { + mlx5_ib_err(dev, "Failed to update PAS\n"); + result = ERR_PTR(ret); + } + } + + mutex_unlock(&mr->umem->odp_data->umem_mutex); + return result; +} + +struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + int access_flags) +{ + struct ib_ucontext *ctx = pd->ibpd.uobject->context; + struct mlx5_ib_mr *imr; + struct ib_umem *umem; + + umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0); + if (IS_ERR(umem)) + return ERR_CAST(umem); + + imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags); + if (IS_ERR(imr)) { + ib_umem_release(umem); + return ERR_CAST(imr); + } + + imr->umem = umem; + init_waitqueue_head(&imr->q_leaf_free); + atomic_set(&imr->num_leaf_free, 0); + + return imr; +} + +static int mr_leaf_free(struct ib_umem *umem, u64 start, + u64 end, void *cookie) +{ + struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie; + + if (mr->parent != imr) + return 0; + + ib_umem_odp_unmap_dma_pages(umem, + ib_umem_start(umem), + ib_umem_end(umem)); + + if (umem->odp_data->dying) + return 0; + + WRITE_ONCE(umem->odp_data->dying, 1); + atomic_inc(&imr->num_leaf_free); + schedule_work(&umem->odp_data->work); + + return 0; +} + +void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) +{ + struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context; + + down_read(&ctx->umem_rwsem); + rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX, + mr_leaf_free, imr); + up_read(&ctx->umem_rwsem); + + wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); +} + /* * Handle a single data segment in a page-fault WQE or RDMA region. * @@ -195,47 +533,43 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, * -EFAULT when there's an error mapping the requested pages. The caller will * abort the page fault handling. */ -static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev, +static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, u64 io_virt, size_t bcnt, u32 *bytes_committed, u32 *bytes_mapped) { int srcu_key; - unsigned int current_seq; + unsigned int current_seq = 0; u64 start_idx; int npages = 0, ret = 0; struct mlx5_ib_mr *mr; u64 access_mask = ODP_READ_ALLOWED_BIT; + struct ib_umem_odp *odp; + int implicit = 0; + size_t size; - srcu_key = srcu_read_lock(&mib_dev->mr_srcu); - mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key); + srcu_key = srcu_read_lock(&dev->mr_srcu); + mr = mlx5_ib_odp_find_mr_lkey(dev, key); /* * If we didn't find the MR, it means the MR was closed while we were * handling the ODP event. In this case we return -EFAULT so that the * QP will be closed. */ if (!mr || !mr->ibmr.pd) { - pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n", - key); + mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n", + key); ret = -EFAULT; goto srcu_unlock; } if (!mr->umem->odp_data) { - pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", - key); + mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", + key); if (bytes_mapped) *bytes_mapped += (bcnt - *bytes_committed); goto srcu_unlock; } - current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq); - /* - * Ensure the sequence number is valid for some time before we call - * gup. - */ - smp_rmb(); - /* * Avoid branches - this code will perform correctly * in all iterations (in iteration 2 and above, @@ -244,63 +578,109 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev, io_virt += *bytes_committed; bcnt -= *bytes_committed; + if (!mr->umem->odp_data->page_list) { + odp = implicit_mr_get_data(mr, io_virt, bcnt); + + if (IS_ERR(odp)) { + ret = PTR_ERR(odp); + goto srcu_unlock; + } + mr = odp->private; + implicit = 1; + + } else { + odp = mr->umem->odp_data; + } + +next_mr: + current_seq = READ_ONCE(odp->notifiers_seq); + /* + * Ensure the sequence number is valid for some time before we call + * gup. + */ + smp_rmb(); + + size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt); start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; if (mr->umem->writable) access_mask |= ODP_WRITE_ALLOWED_BIT; - npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt, - access_mask, current_seq); - if (npages < 0) { - ret = npages; + + ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size, + access_mask, current_seq); + + if (ret < 0) goto srcu_unlock; - } - if (npages > 0) { - mutex_lock(&mr->umem->odp_data->umem_mutex); + if (ret > 0) { + int np = ret; + + mutex_lock(&odp->umem_mutex); if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already * checks this. */ - ret = mlx5_ib_update_xlt(mr, start_idx, npages, + ret = mlx5_ib_update_xlt(mr, start_idx, np, PAGE_SHIFT, MLX5_IB_UPD_XLT_ATOMIC); } else { ret = -EAGAIN; } - mutex_unlock(&mr->umem->odp_data->umem_mutex); + mutex_unlock(&odp->umem_mutex); if (ret < 0) { if (ret != -EAGAIN) - pr_err("Failed to update mkey page tables\n"); + mlx5_ib_err(dev, "Failed to update mkey page tables\n"); goto srcu_unlock; } if (bytes_mapped) { - u32 new_mappings = npages * PAGE_SIZE - + u32 new_mappings = np * PAGE_SIZE - (io_virt - round_down(io_virt, PAGE_SIZE)); - *bytes_mapped += min_t(u32, new_mappings, bcnt); + *bytes_mapped += min_t(u32, new_mappings, size); } + + npages += np; + } + + bcnt -= size; + if (unlikely(bcnt)) { + struct ib_umem_odp *next; + + io_virt += size; + next = odp_next(odp); + if (unlikely(!next || next->umem->address != io_virt)) { + mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", + io_virt, next); + ret = -EAGAIN; + goto srcu_unlock_no_wait; + } + odp = next; + mr = odp->private; + goto next_mr; } srcu_unlock: if (ret == -EAGAIN) { - if (!mr->umem->odp_data->dying) { - struct ib_umem_odp *odp_data = mr->umem->odp_data; + if (implicit || !odp->dying) { unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); if (!wait_for_completion_timeout( - &odp_data->notifier_completion, + &odp->notifier_completion, timeout)) { - pr_warn("timeout waiting for mmu notifier completion\n"); + mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n", + current_seq, odp->notifiers_seq); } } else { /* The MR is being killed, kill the QP as well. */ ret = -EFAULT; } } - srcu_read_unlock(&mib_dev->mr_srcu, srcu_key); + +srcu_unlock_no_wait: + srcu_read_unlock(&dev->mr_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -618,8 +998,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { if (ret != -ENOENT) - mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n", - ret); + mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n", + ret, pfault->wqe.wq_num, pfault->type); goto resolve_page_fault; } @@ -627,7 +1007,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, resolve_page_fault: mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", - pfault->token, resume_with_error, + pfault->wqe.wq_num, resume_with_error, pfault->type); free_page((unsigned long)buffer); } @@ -700,10 +1080,9 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, rkey, address, prefetch_len, &bytes_committed, NULL); - if (ret < 0) { + if (ret < 0 && ret != -EAGAIN) { mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", - ret, pfault->token, address, - prefetch_len); + ret, pfault->token, address, prefetch_len); } } } @@ -728,19 +1107,61 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, } } -int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) +void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) +{ + if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return; + + switch (ent->order - 2) { + case MLX5_IMR_MTT_CACHE_ENTRY: + ent->page = PAGE_SHIFT; + ent->xlt = MLX5_IMR_MTT_ENTRIES * + sizeof(struct mlx5_mtt) / + MLX5_IB_UMR_OCTOWORD; + ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + ent->limit = 0; + break; + + case MLX5_IMR_KSM_CACHE_ENTRY: + ent->page = MLX5_KSM_PAGE_SHIFT; + ent->xlt = mlx5_imr_ksm_entries * + sizeof(struct mlx5_klm) / + MLX5_IB_UMR_OCTOWORD; + ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; + ent->limit = 0; + break; + } +} + +int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { int ret; - ret = init_srcu_struct(&ibdev->mr_srcu); + ret = init_srcu_struct(&dev->mr_srcu); if (ret) return ret; + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { + ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); + if (ret) { + mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret); + return ret; + } + } + return 0; } -void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) +void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev) +{ + cleanup_srcu_struct(&dev->mr_srcu); +} + +int mlx5_ib_odp_init(void) { - cleanup_srcu_struct(&ibdev->mr_srcu); + mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) - + MLX5_IMR_MTT_BITS); + + return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2534b8a0fd7b..886ff2b00500 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1053,6 +1053,8 @@ enum { enum { MAX_UMR_CACHE_ENTRY = 20, + MLX5_IMR_MTT_CACHE_ENTRY, + MLX5_IMR_KSM_CACHE_ENTRY, MAX_MR_CACHE_ENTRIES }; -- cgit v1.2.3 From 72cd57178f3447b612f136c7a215418b5a20bf3d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 24 Jan 2017 13:02:36 +0200 Subject: IB/mlx5: Support raw packet protocol Mark support for the new raw packet protocol on Eth ports. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index eb8719ca500e..c79a5c9f1f9a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2938,11 +2938,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev) if (ll == IB_LINK_LAYER_INFINIBAND) return RDMA_CORE_PORT_IBA_IB; + ret = RDMA_CORE_PORT_RAW_PACKET; + if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) - return 0; + return ret; if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) - return 0; + return ret; if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) ret |= RDMA_CORE_PORT_IBA_ROCE; -- cgit v1.2.3 From bc63f9d5580696a49d27056293e3ccd48b8b917e Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 24 Jan 2017 13:02:37 +0200 Subject: IB/mlx4: Support raw packet protocol Mark support for the new raw packet protocol on Eth ports. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7031a8dd4d14..e0c1ef7696e8 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2546,16 +2546,19 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) { immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; } else { if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET; + if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE | + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP)) + immutable->max_mad_size = IB_MGMT_MAD_SIZE; } - immutable->max_mad_size = IB_MGMT_MAD_SIZE; - return 0; } -- cgit v1.2.3 From ce1e055fb9aba56c80d84f63edfa102691f63d41 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 24 Jan 2017 13:02:38 +0200 Subject: IB: Add protocol for USNIC Add protocol definition for the proprietary the USNIC driver. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Reviewed-by: Christian Benvenuti Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 1 + include/rdma/ib_verbs.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 0a89a955550b..dde0b23f6559 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -325,6 +325,7 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; + immutable->core_cap_flags = RDMA_CORE_PORT_USNIC; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f311c2593a85..07399023352b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -490,6 +490,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_CAP_PROT_IWARP 0x00400000 #define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000 #define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000 +#define RDMA_CORE_CAP_PROT_USNIC 0x02000000 #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ @@ -515,6 +516,8 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( #define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET) +#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC) + struct ib_port_attr { u64 subnet_prefix; enum ib_port_state state; @@ -2350,6 +2353,11 @@ static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 p return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET; } +static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC; +} + /** * rdma_cap_ib_mad - Check if the port of a device supports Infiniband * Management Datagrams. -- cgit v1.2.3 From c4550c63b363914071d63a36e5bedcfe22057940 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 24 Jan 2017 13:02:39 +0200 Subject: IB: Query ports via the core instead of direct into the driver Change the drivers to call ib_query_port in their get port immutable handler instead of their own query port handler. Doing this required to set the core cap flags of this device before the ib_query_port call is made, since the IB core might need these caps to serve the port query. Drivers are ensured by the IB core that the port attributes passed to the port query verb implementation are zero, and hence we removed the zeroing from the drivers. This patch doesn't add any new functionality. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Acked-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 7 ++++--- drivers/infiniband/hw/cxgb4/provider.c | 8 ++++---- drivers/infiniband/hw/hfi1/verbs.c | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 7 ++++--- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 8 ++++---- drivers/infiniband/hw/mlx4/alias_GUID.c | 1 + drivers/infiniband/hw/mlx4/main.c | 18 +++++++++--------- drivers/infiniband/hw/mlx4/sysfs.c | 1 + drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 11 +++++++---- drivers/infiniband/hw/mthca/mthca_provider.c | 9 +++++---- drivers/infiniband/hw/nes/nes_verbs.c | 5 +++-- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 9 +++++---- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 + drivers/infiniband/hw/qedr/verbs.c | 9 +++++---- drivers/infiniband/hw/qib/qib_verbs.c | 1 + drivers/infiniband/hw/usnic/usnic_ib_main.c | 5 +++-- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 5 +++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 4 ++-- drivers/infiniband/sw/rdmavt/vt.c | 7 ++++--- drivers/infiniband/sw/rxe/rxe_verbs.c | 6 ++++-- 22 files changed, 73 insertions(+), 54 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 9d5fe1853da4..9e39252a570a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1133,7 +1133,7 @@ static int iwch_query_port(struct ib_device *ibdev, dev = to_iwch_dev(ibdev); netdev = dev->rdev.port_info.lldevs[port-1]; - memset(props, 0, sizeof(struct ib_port_attr)); + /* props being zeroed by the caller, avoid zeroing it here */ props->max_mtu = IB_MTU_4096; if (netdev->mtu >= 4096) props->active_mtu = IB_MTU_4096; @@ -1338,13 +1338,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; - err = iwch_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 49b51b7e0fd7..5b00b50a484f 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -358,8 +358,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, dev = to_c4iw_dev(ibdev); netdev = dev->rdev.lldi.ports[port-1]; - - memset(props, 0, sizeof(struct ib_port_attr)); + /* props being zeroed by the caller, avoid zeroing it here */ props->max_mtu = IB_MTU_4096; if (netdev->mtu >= 4096) props->active_mtu = IB_MTU_4096; @@ -505,13 +504,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; - err = c4iw_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 95ed4d6da510..f2d4e0428725 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1384,6 +1384,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; u16 lid = ppd->lid; + /* props being zeroed by the caller, avoid zeroing it here */ props->lid = lid ? lid : 0; props->lmc = ppd->lmc; /* OPA logical states match IB logical states */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 4953d9cb83a7..7be8158f15ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -249,7 +249,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, assert(port_num > 0); port = port_num - 1; - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ props->max_mtu = hr_dev->caps.max_mtu; props->gid_tbl_len = hr_dev->caps.gid_table_len[port]; @@ -400,14 +400,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, struct ib_port_attr attr; int ret; - ret = hns_roce_query_port(ib_dev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + + ret = ib_query_port(ib_dev, port_num, &attr); if (ret) return ret; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 29e97df9e1a7..3e830486417b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev, struct i40iw_device *iwdev = to_iwdev(ibdev); struct net_device *netdev = iwdev->netdev; - memset(props, 0, sizeof(*props)); - + /* props being zeroed by the caller, avoid zeroing it here */ props->max_mtu = IB_MTU_4096; if (netdev->mtu >= 4096) props->active_mtu = IB_MTU_4096; @@ -2506,14 +2505,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; - err = i40iw_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 06020c54db20..ea24230ea0d4 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev, struct list_head *head = &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; + memset(&attr, 0, sizeof(attr)); err = __mlx4_ib_query_port(ibdev, port, &attr, 1); if (err) { pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n", diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e0c1ef7696e8..ba6af84cc236 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -741,7 +741,7 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, { int err; - memset(props, 0, sizeof *props); + /* props being zeroed by the caller, avoid zeroing it here */ err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? ib_link_query_port(ibdev, port, props, netw_view) : @@ -1014,7 +1014,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, mutex_lock(&mdev->cap_mask_mutex); - err = mlx4_ib_query_port(ibdev, port, &attr); + err = ib_query_port(ibdev, port, &attr); if (err) goto out; @@ -2537,13 +2537,6 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, struct mlx4_ib_dev *mdev = to_mdev(ibdev); int err; - err = mlx4_ib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) { immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; immutable->max_mad_size = IB_MGMT_MAD_SIZE; @@ -2559,6 +2552,13 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->max_mad_size = IB_MGMT_MAD_SIZE; } + err = ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + return 0; } diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 69fb5ba94d0f..0ba5ba7540c8 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num) int ret = 0 ; struct ib_port_attr attr; + memset(&attr, 0, sizeof(attr)); /* get the physical gid and pkey table sizes.*/ ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1); if (ret) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index af962e7fdc3a..8dacb49eabd9 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -527,7 +527,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, if (!in_mad || !out_mad) goto out; - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c79a5c9f1f9a..8fd35ebb13e3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -170,7 +170,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, enum ib_mtu ndev_ib_mtu; u16 qkey_viol_cntr; - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ props->port_cap_flags |= IB_PORT_CM_SUP; props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; @@ -857,7 +857,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, goto out; } - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); if (err) @@ -1005,7 +1005,7 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, mutex_lock(&dev->cap_mask_mutex); - err = mlx5_ib_query_port(ibdev, port, &attr); + err = ib_query_port(ibdev, port, &attr); if (err) goto out; @@ -2644,6 +2644,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev) } for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { + memset(pprops, 0, sizeof(*pprops)); err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { mlx5_ib_warn(dev, "query_port %d failed %d\n", @@ -2963,7 +2964,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); int err; - err = mlx5_ib_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = get_core_cap_flags(ibdev); + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index d31708742ba5..ce163184e742 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; - memset(props, 0, sizeof *props); + /* props being zeroed by the caller, avoid zeroing it here */ init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; @@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev, if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) return -ERESTARTSYS; - err = mthca_query_port(ibdev, port, &attr); + err = ib_query_port(ibdev, port, &attr); if (err) goto out; @@ -1166,13 +1166,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; - err = mthca_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index aff9fb14768b..b7179f4ac3a6 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct net_device *netdev = nesvnic->netdev; - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ props->max_mtu = IB_MTU_4096; @@ -3670,13 +3670,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + err = nes_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 896071502739..3e43bdc81e7a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, int err; dev = get_ocrdma_dev(ibdev); - err = ocrdma_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (ocrdma_is_udp_encap_supported(dev)) + immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; - if (ocrdma_is_udp_encap_supported(dev)) - immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 6af44f8db3d5..013d15c7e593 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev, struct ocrdma_dev *dev; struct net_device *netdev; + /* props being zeroed by the caller, avoid zeroing it here */ dev = get_ocrdma_dev(ibdev); if (port > 1) { pr_err("%s(%d) invalid_port=0x%x\n", __func__, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 57c8de208077..91a16d38d1fc 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -238,8 +238,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) } rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx); - memset(attr, 0, sizeof(*attr)); + /* *attr being zeroed by the caller, avoid zeroing it here */ if (rdma_port->port_state == QED_RDMA_PORT_UP) { attr->state = IB_PORT_ACTIVE; attr->phys_state = 5; @@ -3549,14 +3549,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; - err = qedr_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | - RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4b54c0ddd08a..9161574601f6 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1303,6 +1303,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num, enum ib_mtu mtu; u16 lid = ppd->lid; + /* props being zeroed by the caller, avoid zeroing it here */ props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); props->lmc = ppd->lmc; props->state = dd->f_iblink_state(ppd->lastibcstat); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index dde0b23f6559..4f5a45db08e1 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -321,11 +321,12 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; - err = usnic_ib_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_USNIC; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; - immutable->core_cap_flags = RDMA_CORE_PORT_USNIC; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 74819a7951e2..0ba274ff7be6 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -330,7 +330,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, mutex_lock(&us_ibdev->usdev_lock); __ethtool_get_link_ksettings(us_ibdev->netdev, &cmd); - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; props->lmc = 1; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 231a1ce1f4be..b8b0081de478 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; int err; - err = pvrdma_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 54891370d18a..b3a982be8006 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port, return err; } - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ props->state = pvrdma_port_state_to_ib(resp->attrs.state); props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu); @@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, } mutex_lock(&vdev->port_mutex); - ret = pvrdma_query_port(ibdev, port, &attr); + ret = ib_query_port(ibdev, port, &attr); if (ret) goto out; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index d430c2f7cec4..1165639a914b 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -165,7 +165,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, return -EINVAL; rvp = rdi->ports[port_index]; - memset(props, 0, sizeof(*props)); + /* props being zeroed by the caller, avoid zeroing it here */ props->sm_lid = rvp->sm_lid; props->sm_sl = rvp->sm_sl; props->port_cap_flags = rvp->port_cap_flags; @@ -326,13 +326,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, if (port_index < 0) return -EINVAL; - err = rvt_query_port(ibdev, port_num, &attr); + immutable->core_cap_flags = rdi->dparms.core_cap_flags; + + err = ib_query_port(ibdev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = rdi->dparms.core_cap_flags; immutable->max_mad_size = rdi->dparms.max_mad_size; return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index beb7021ff18a..371ef3bac8d4 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -86,6 +86,7 @@ static int rxe_query_port(struct ib_device *dev, port = &rxe->port; + /* *attr being zeroed by the caller, avoid zeroing it here */ *attr = port->attr; mutex_lock(&rxe->usdev_lock); @@ -261,13 +262,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num, int err; struct ib_port_attr attr; - err = rxe_query_port(dev, port_num, &attr); + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + + err = ib_query_port(dev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; -- cgit v1.2.3 From 850b74151433966cff84801867e2a210a1c830bf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Jan 2017 20:26:18 +0200 Subject: IB/mlx4: Remove unused variable from function declaration Remove unused netw_view parameter from eth_link_query_port() function. Reported-by: Shiraz Saleem Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ba6af84cc236..211cbbe9ccd1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -678,7 +678,7 @@ static u8 state_to_phys_state(enum ib_port_state state) } static int eth_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, int netw_view) + struct ib_port_attr *props) { struct mlx4_ib_dev *mdev = to_mdev(ibdev); @@ -745,7 +745,7 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? ib_link_query_port(ibdev, port, props, netw_view) : - eth_link_query_port(ibdev, port, props, netw_view); + eth_link_query_port(ibdev, port, props); return err; } -- cgit v1.2.3 From d8030b0de03ec6bdf3665e1d915fa0f02e5ec89d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 9 Feb 2017 19:31:47 +0200 Subject: IB/mlx5: Fix blue flame buffer size calculation A blue flame register is comprised of two buffers of equal size. Fixes: 5fe9dec0d0454 ("IB/mlx5: Use blue flame register allocator in mlx5_ib") Signed-off-by: Eli Cohen Reviewed-by: Noa Osherovich Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index fbfff1269816..69a1604a887c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -905,7 +905,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, else qp->bf.bfreg = &dev->bfreg; - qp->bf.buf_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + /* We need to divide by two since each register is comprised of + * two buffers of identical size, namely odd and even + */ + qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2; uar_index = qp->bf.bfreg->index; err = calc_sq_size(dev, init_attr, qp); -- cgit v1.2.3 From a748d60df32ec5da31626a140be1795eefa04282 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Tue, 14 Feb 2017 07:24:53 +0200 Subject: IB/mlx4: Take source GID by index from HW GID table Previously, we used the HW GID index in order to search the source GID in the software GID cached table. In some cases, for example when the MAC Address of the network interface is changed, the GID cached table saves the old-IPv6-link-local GID at the end of the table. When returning the old MAC address, the software GID cached table tries to add the new IPv6-link-local GID, and when it identifies that the GID already exists, the software GID cached does not add it. Thus a mismatch occurs between the HW and the SW GID tables. It resulted with sending traffic with the wrong source GID. This commit fixes the issue by taking both from the HW table. The problem can be reproduced with the following scenario: Client: # ifconfig ens6 2.2.2.5 # ifconfig ens6 inet6 add 2001:0db8:0:f101::5/64 # ifconfig ens6 hw ether f4:52:14:61:a0:71 # ifconfig ens6 inet6 del 2001:0db8:0:f101::5/64 # ifconfig ens6 inet6 add 2001:0db8:0:f101::5/64 # ucmatose -f ipv6 -b 2001:0db8:0:f101::5 -s 2001:0db8:0:f101::6 -p 20156 Server: # ucmatose -f ipv6 -b 2001:0db8:0:f101::6 -p 20156 Fixes: 4c3eb3ca1396 ('IB/mlx4: Add VLAN support for IBoE') Signed-off-by: Talat Batheesh Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 56 +++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c068add8838b..4f50b96fe605 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2424,11 +2424,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num) return vl; } +static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, + int index, union ib_gid *gid, + enum ib_gid_type *gid_type) +{ + struct mlx4_ib_iboe *iboe = &ibdev->iboe; + struct mlx4_port_gid_table *port_gid_table; + unsigned long flags; + + port_gid_table = &iboe->gids[port_num - 1]; + spin_lock_irqsave(&iboe->lock, flags); + memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid)); + *gid_type = port_gid_table->gids[index].gid_type; + spin_unlock_irqrestore(&iboe->lock, flags); + if (!memcmp(gid, &zgid, sizeof(*gid))) + return -ENOENT; + + return 0; +} + #define MLX4_ROCEV2_QP1_SPORT 0xC000 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; + struct mlx4_ib_dev *ibdev = to_mdev(ib_dev); struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; @@ -2454,8 +2474,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { - struct ib_gid_attr gid_attr; - + enum ib_gid_type gid_type; if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so @@ -2466,18 +2485,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, if (err) return err; } else { - err = ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &sgid, - &gid_attr); - if (!err) { - if (gid_attr.ndev) - dev_put(gid_attr.ndev); - if (!memcmp(&sgid, &zgid, sizeof(sgid))) - err = -ENOENT; - } + err = fill_gid_by_hw_index(ibdev, sqp->qp.port, + ah->av.ib.gid_index, + &sgid, &gid_type); if (!err) { - is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; + is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) ip_version = 4; @@ -2955,21 +2967,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (sqp->roce_v2_gsi) { struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); - struct ib_gid_attr gid_attr; + enum ib_gid_type gid_type; union ib_gid gid; - if (!ib_get_cached_gid(ibqp->device, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &gid, - &gid_attr)) { - if (gid_attr.ndev) - dev_put(gid_attr.ndev); - qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? - to_mqp(sqp->roce_v2_gsi) : qp; - } else { + if (!fill_gid_by_hw_index(mdev, sqp->qp.port, + ah->av.ib.gid_index, + &gid, &gid_type)) + qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? + to_mqp(sqp->roce_v2_gsi) : qp; + else pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n", ah->av.ib.gid_index); - } } } -- cgit v1.2.3 From cdbe33d0f82d68ff74f05502a4c26e65ec7e90bb Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 14 Feb 2017 07:25:38 +0200 Subject: IB/mlx5: Fix configuration of port capabilities When the "ib_virt" cap is set, configuration of port capabilities need to be done through mlx5_core_modify_hca_vport_context. Since modify_hca_vport_context accepts mask and value, there is no need to read the port capabilities and calculate the new cap values so we avoid the mutex when ib_virt is set. Signed-off-by: Eli Cohen Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8fd35ebb13e3..4a043cf35b9a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -995,6 +995,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, return err; } +static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask, + u32 value) +{ + struct mlx5_hca_vport_context ctx = {}; + int err; + + err = mlx5_query_hca_vport_context(dev->mdev, 0, + port_num, 0, &ctx); + if (err) + return err; + + if (~ctx.cap_mask1_perm & mask) { + mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n", + mask, ctx.cap_mask1_perm); + return -EINVAL; + } + + ctx.cap_mask1 = value; + ctx.cap_mask1_perm = mask; + err = mlx5_core_modify_hca_vport_context(dev->mdev, 0, + port_num, 0, &ctx); + + return err; +} + static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { @@ -1002,6 +1027,16 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_attr attr; u32 tmp; int err; + u32 change_mask; + u32 value; + bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) == + IB_LINK_LAYER_INFINIBAND); + + if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) { + change_mask = props->clr_port_cap_mask | props->set_port_cap_mask; + value = ~props->clr_port_cap_mask | props->set_port_cap_mask; + return set_port_caps_atomic(dev, port, change_mask, value); + } mutex_lock(&dev->cap_mask_mutex); -- cgit v1.2.3