From 6ee51a4e866bbb0921180b457ed16cd172859346 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:37 +0200 Subject: mlx4: Adjust QP1 multiplexing for RoCE/SRIOV This requires the following modifications: 1. Fix build_mlx4_header to properly fill in the ETH fields 2. Adjust mux and demux QP1 flow to support RoCE. This commit still assumes only one GID per slave for RoCE. The commit enabling multiple GIDs is a subsequent commit, and is done separately because of its complexity. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cm.c | 8 +++++-- drivers/infiniband/hw/mlx4/mad.c | 50 +++++++++++++++++++++++++++++++++++----- drivers/infiniband/hw/mlx4/qp.c | 29 +++++++++++++---------- 3 files changed, 67 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index d1f5f1dd77b0..b8d911543783 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -315,7 +315,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id } int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, - struct ib_mad *mad) + struct ib_mad *mad) { u32 pv_cm_id; struct id_map_entry *id; @@ -323,6 +323,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { union ib_gid gid; + if (!slave) + return 0; + gid = gid_from_req_msg(ibdev, mad); *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); if (*slave < 0) { @@ -341,7 +344,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, return -ENOENT; } - *slave = id->slave_id; + if (slave) + *slave = id->slave_id; set_remote_comm_id(mad, id->sl_cm_id); if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f2a3f48107e7..c2e9879a5a34 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, int ret = 0; u16 tun_pkey_ix; u16 cached_pkey; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; if (dest_qpt > IB_QPT_GSI) return -EINVAL; @@ -509,6 +510,12 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, * The driver will set the force loopback bit in post_send */ memset(&attr, 0, sizeof attr); attr.port_num = port; + if (is_eth) { + ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw); + if (ret) + return ret; + attr.ah_flags = IB_AH_GRH; + } ah = ib_create_ah(tun_ctx->pd, &attr); if (IS_ERR(ah)) return -ENOMEM; @@ -580,6 +587,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, int err; int slave; u8 *slave_id; + int is_eth = 0; + + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + is_eth = 0; + else + is_eth = 1; + + if (is_eth) { + if (!(wc->wc_flags & IB_WC_GRH)) { + mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); + return -EINVAL; + } + if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) { + mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); + return -EINVAL; + } + if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } + if (slave >= dev->dev->caps.sqp_demux) { + mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", + slave, dev->dev->caps.sqp_demux); + return -ENOENT; + } + + if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad)) + return 0; + + err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); + if (err) + pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + slave, err); + return 0; + } /* Initially assume that this mad is for us */ slave = mlx4_master_func_num(dev->dev); @@ -1260,12 +1302,8 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); - if ((ah_attr.ah_flags & IB_AH_GRH) && - (ah_attr.grh.sgid_index != slave)) { - mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n", - slave, ah_attr.grh.sgid_index); - return; - } + if (ah_attr.ah_flags & IB_AH_GRH) + ah_attr.grh.sgid_index = slave; mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d8f4d1fe8494..c6ef2e7e3045 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1842,9 +1842,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); - struct net_device *ndev; union ib_gid sgid; u16 pkey; int send_size; @@ -1868,12 +1868,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so * we must use our own cache */ - sgid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sgid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; + err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid.raw[0]); + if (err) + return err; } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, @@ -1902,6 +1901,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + if (is_eth) + memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); + else { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so @@ -1917,6 +1919,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); + } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } @@ -1948,17 +1951,19 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } if (is_eth) { - u8 *smac; + u8 smac[6]; + struct in6_addr in6; + u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; mlx->sched_prio = cpu_to_be16(pcp); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); /* FIXME: cache smac value? */ - ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]; - if (!ndev) - return -ENODEV; - smac = ndev->dev_addr; + memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); + memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); + memcpy(&in6, sgid.raw, sizeof(in6)); + rdma_get_ll_mac(&in6, smac); memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); -- cgit v1.2.3 From b6ffaeffaea4d92f05f5ba1ef54df407cb7c8517 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:39 +0200 Subject: mlx4: In RoCE allow guests to have multiple GIDS The GIDs are statically distributed, as follows: PF: gets 16 GIDs VFs: Remaining GIDS are divided evenly between VFs activated by the driver. If the division is not even, lower-numbered VFs get an extra GID. For an IB interface, the number of gids per guest remains as before: one gid per guest. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mad.c | 34 +++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 +- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 + drivers/net/ethernet/mellanox/mlx4/port.c | 117 +++++++++++++++++---- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 64 ++++++++--- include/linux/mlx4/device.h | 1 + 7 files changed, 192 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c2e9879a5a34..c5bca0f0da4a 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -511,9 +511,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memset(&attr, 0, sizeof attr); attr.port_num = port; if (is_eth) { - ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw); - if (ret) - return ret; + memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); attr.ah_flags = IB_AH_GRH; } ah = ib_create_ah(tun_ctx->pd, &attr); @@ -1216,6 +1214,34 @@ out: return ret; } +static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) +{ + int gids; + int vfs; + + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + return slave; + + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = dev->dev->num_vfs; + + if (slave == 0) + return 0; + if (slave <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); +} + +static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, + struct ib_ah_attr *ah_attr) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + ah_attr->grh.sgid_index = slave; + else + ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port); +} + static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) { struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); @@ -1303,7 +1329,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); if (ah_attr.ah_flags & IB_AH_GRH) - ah_attr.grh.sgid_index = slave; + fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9cdf452140da..6e1ee2170a39 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -934,7 +934,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); - short_field = 1; /* slave max gids */ + if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) + short_field = mlx4_get_slave_num_gids(dev, slave); + else + short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_GID_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 979ea4364efb..4c441aa83016 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1462,7 +1462,11 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) int i; for (i = 1; i <= dev->caps.num_ports; i++) { - dev->caps.gid_table_len[i] = 1; + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) + dev->caps.gid_table_len[i] = + mlx4_get_slave_num_gids(dev, 0); + else + dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = dev->phys_caps.pkey_phys_table_len[i] - 1; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index da829f4ef938..6ba38c98c492 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1287,4 +1287,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); void mlx4_init_quotas(struct mlx4_dev *dev); +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 591740b06043..ece328166e94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -507,6 +507,31 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) } static struct mlx4_roce_gid_entry zgid_entry; +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave) +{ + if (slave == 0) + return MLX4_ROCE_PF_GIDS; + if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs)) + return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1; + return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs; +} + +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave) +{ + int gids; + int vfs; + + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = dev->num_vfs; + + if (slave == 0) + return 0; + if (slave <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); +} + static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) { @@ -516,15 +541,18 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, struct mlx4_slave_state *slave_st = &master->slave_state[slave]; struct mlx4_set_port_rqp_calc_context *qpn_context; struct mlx4_set_port_general_context *gen_context; - struct mlx4_roce_gid_entry *gid_entry; + struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1; int reset_qkey_viols; int port; int is_eth; + int num_gids; + int base; u32 in_modifier; u32 promisc; u16 mtu, prev_mtu; int err; - int i; + int i, j; + int offset; __be32 agg_cap_mask; __be32 slave_cap_mask; __be32 new_cap_mask; @@ -585,26 +613,65 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, gen_context->mtu = cpu_to_be16(master->max_mtu[port]); break; case MLX4_SET_PORT_GID_TABLE: - gid_entry = (struct mlx4_roce_gid_entry *)(inbox->buf); - /* check that do not have duplicates */ - if (memcmp(gid_entry->raw, zgid_entry.raw, 16)) { - for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { - if (slave != i && - !memcmp(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16)) { - mlx4_warn(dev, "requested gid entry for slave:%d " - "is a duplicate of slave %d\n", - slave, i); - return -EEXIST; + /* change to MULTIPLE entries: number of guest's gids + * need a FOR-loop here over number of gids the guest has. + * 1. Check no duplicates in gids passed by slave + */ + num_gids = mlx4_get_slave_num_gids(dev, slave); + base = mlx4_get_base_gid_ix(dev, slave); + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < num_gids; gid_entry_mbox++, i++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + gid_entry_mb1 = gid_entry_mbox + 1; + for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) { + if (!memcmp(gid_entry_mb1->raw, + zgid_entry.raw, sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw, + sizeof(gid_entry_mbox->raw))) { + /* found duplicate */ + return -EINVAL; } } } - /* insert slave GID at proper index */ - memcpy(priv->roce_gids[port - 1][slave].raw, gid_entry->raw, 16); - /* rewrite roce port gids table to FW */ + + /* 2. Check that do not have duplicates in OTHER + * entries in the port GID table + */ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { - memcpy(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16); - gid_entry++; + if (i >= base && i < base + num_gids) + continue; /* don't compare to slave's current gids */ + gid_entry_tbl = &priv->roce_gids[port - 1][i]; + if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry))) + continue; + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (j = 0; j < num_gids; gid_entry_mbox++, j++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw, + sizeof(gid_entry_tbl->raw))) { + /* found duplicate */ + mlx4_warn(dev, "requested gid entry for slave:%d " + "is a duplicate of gid at index %d\n", + slave, i); + return -EINVAL; + } + } } + + /* insert slave GIDs with memcpy, starting at slave's base index */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++) + memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16); + + /* Now, copy roce port gids table to current mailbox for passing to FW */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++) + memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16); + break; } return mlx4_cmd(dev, inbox->dma, in_mod, op_mod, @@ -958,6 +1025,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, { struct mlx4_priv *priv = mlx4_priv(dev); int i, found_ix = -1; + int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; if (!mlx4_is_mfunc(dev)) return -EINVAL; @@ -969,8 +1037,19 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, } } - if (found_ix >= 0) - *slave_id = found_ix; + if (found_ix >= 0) { + if (found_ix < MLX4_ROCE_PF_GIDS) + *slave_id = 0; + else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) * + (vf_gids / dev->num_vfs + 1)) + *slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) / + (vf_gids / dev->num_vfs + 1)) + 1; + else + *slave_id = + ((found_ix - MLX4_ROCE_PF_GIDS - + ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) / + (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1; + } return (found_ix >= 0) ? 0 : -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 57428a0cb9dd..1c3634eab5e1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -219,6 +219,11 @@ struct res_fs_rule { int qpn; }; +static int mlx4_is_eth(struct mlx4_dev *dev, int port) +{ + return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1; +} + static void *res_tracker_lookup(struct rb_root *root, u64 res_id) { struct rb_node *node = root->rb_node; @@ -600,15 +605,34 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, struct mlx4_qp_context *qp_ctx = inbox->buf + 8; enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + int port; - if (MLX4_QP_ST_UD == ts) - qp_ctx->pri_path.mgid_index = 0x80 | slave; - - if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - qp_ctx->pri_path.mgid_index = slave & 0x7F; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - qp_ctx->alt_path.mgid_index = slave & 0x7F; + if (MLX4_QP_ST_UD == ts) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) + qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80; + else + qp_ctx->pri_path.mgid_index = slave | 0x80; + + } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_XRC == ts || MLX4_QP_ST_UC == ts) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->pri_path.mgid_index &= 0x7f; + } else { + qp_ctx->pri_path.mgid_index = slave & 0x7F; + } + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->alt_path.mgid_index &= 0x7f; + } else { + qp_ctx->alt_path.mgid_index = slave & 0x7F; + } + } } } @@ -2734,6 +2758,8 @@ static int verify_qp_parameters(struct mlx4_dev *dev, u32 qp_type; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; + int port; + int num_gids; qp_ctx = inbox->buf + 8; qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; @@ -2741,6 +2767,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, switch (qp_type) { case MLX4_QP_ST_RC: + case MLX4_QP_ST_XRC: case MLX4_QP_ST_UC: switch (transition) { case QP_TRANS_INIT2RTR: @@ -2749,13 +2776,24 @@ static int verify_qp_parameters(struct mlx4_dev *dev, case QP_TRANS_SQD2SQD: case QP_TRANS_SQD2RTS: if (slave != mlx4_master_func_num(dev)) - /* slaves have only gid index 0 */ - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - if (qp_ctx->pri_path.mgid_index) + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave); + else + num_gids = 1; + if (qp_ctx->pri_path.mgid_index >= num_gids) return -EINVAL; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - if (qp_ctx->alt_path.mgid_index) + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave); + else + num_gids = 1; + if (qp_ctx->alt_path.mgid_index >= num_gids) return -EINVAL; + } break; default: break; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4853650679d..86e02e5c2c77 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -49,6 +49,7 @@ #define MIN_MSIX_P_PORT 5 #define MLX4_ROCE_MAX_GIDS 128 +#define MLX4_ROCE_PF_GIDS 16 enum { MLX4_FLAG_MSI_X = 1 << 0, -- cgit v1.2.3 From 2f5bb473681b88819a9de28ac3a47e7737815a92 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:40 +0200 Subject: mlx4: Add ref counting to port MAC table for RoCE The IB side of RoCE requires the MAC table index of the MAC address used by its QPs. To obtain the real MAC index, the IB side registers the MAC (increasing its ref count, and also returning the real MAC index) during the modify-qp sequence. This protects against the ETH side deleting or modifying that MAC table entry while the QP is active. Note that until the modify-qp command returns success, the MAC and VLAN information only has "candidate" status. If the modify-qp succeeds, the "candidate" info is promoted to the operational MAC/VLAN info for the qp. If the modify fails, the candidate MAC/VLAN is unregistered, and the old qp info is preserved. The patch is a bit complex, because there are multiple qp transitions where the primary-path information may be modified: INIT-to-RTR, and SQD-to-SQD. Similarly for the alternate path information. Therefore the code must handle cases where path information has already been entered into the QP context by previous qp transitions. For the MAC address, the success logic is as follows: 1. If there was no previous MAC, simply move the candidate MAC information to the operational information, and reset the candidate MAC info. 2. If there was a previous MAC, unregister it. Then move the MAC information from candidate to operational, and reset the candidate info (as in 1. above). The MAC address failure logic is the same for all cases: - Unregister the candidate MAC, and reset the candidate MAC info. For Vlan registration, the logic is similar. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 19 +- drivers/infiniband/hw/mlx4/qp.c | 285 ++++++++++++++++++--- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 75 +++++- 3 files changed, 329 insertions(+), 50 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index a230683af940..febc8f9bc59a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -241,6 +241,22 @@ struct mlx4_ib_proxy_sqp_hdr { struct mlx4_rcv_tunnel_hdr tun; } __packed; +struct mlx4_roce_smac_vlan_info { + u64 smac; + int smac_index; + int smac_port; + u64 candidate_smac; + int candidate_smac_index; + int candidate_smac_port; + u16 vid; + int vlan_index; + int vlan_port; + u16 candidate_vid; + int candidate_vlan_index; + int candidate_vlan_port; + int update_vid; +}; + struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -273,8 +289,9 @@ struct mlx4_ib_qp { struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; + struct mlx4_roce_smac_vlan_info pri; + struct mlx4_roce_smac_vlan_info alt; u64 reg_id; - }; struct mlx4_ib_srq { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c6ef2e7e3045..11332f074023 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -662,10 +662,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (!sqp) return -ENOMEM; qp = &sqp->qp; + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; } else { qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL); if (!qp) return -ENOMEM; + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; } } else qp = *caller_qp; @@ -940,11 +944,32 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, { struct mlx4_ib_cq *send_cq, *recv_cq; - if (qp->state != IB_QPS_RESET) + if (qp->state != IB_QPS_RESET) { if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) pr_warn("modify QP %06x to RESET failed.\n", qp->mqp.qpn); + if (qp->pri.smac) { + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = 0; + } + if (qp->alt.smac) { + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = 0; + } + if (qp->pri.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); + qp->pri.vid = 0xFFFF; + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } + if (qp->alt.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); + qp->alt.vid = 0xFFFF; + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } + } get_cqs(qp, &send_cq, &recv_cq); @@ -1057,6 +1082,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; /* fall through */ case IB_QPT_UD: { @@ -1188,12 +1215,13 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, u64 smac, u16 vlan_tag, struct mlx4_qp_path *path, - u8 port) + struct mlx4_roce_smac_vlan_info *smac_info, u8 port) { int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_ETHERNET; int vidx; int smac_index; + int err; path->grh_mylmc = ah->src_path_bits & 0x7f; @@ -1223,61 +1251,103 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, } if (is_eth) { - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 7) << 3); - if (!(ah->ah_flags & IB_AH_GRH)) return -1; - memcpy(path->dmac, ah->dmac, ETH_ALEN); - path->ackto = MLX4_IB_LINK_TYPE_ETH; - /* find the index into MAC table for IBoE */ - if (!is_zero_ether_addr((const u8 *)&smac)) { - if (mlx4_find_cached_mac(dev->dev, port, smac, - &smac_index)) - return -ENOENT; - } else { - smac_index = 0; - } - - path->grh_mylmc &= 0x80 | smac_index; + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3); path->feup |= MLX4_FEUP_FORCE_ETH_UP; if (vlan_tag < 0x1000) { - if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) - return -ENOENT; - - path->vlan_index = vidx; - path->fl = 1 << 6; + if (smac_info->vid < 0x1000) { + /* both valid vlan ids */ + if (smac_info->vid != vlan_tag) { + /* different VIDs. unreg old and reg new */ + err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); + if (err) + return err; + smac_info->candidate_vid = vlan_tag; + smac_info->candidate_vlan_index = vidx; + smac_info->candidate_vlan_port = port; + smac_info->update_vid = 1; + path->vlan_index = vidx; + } else { + path->vlan_index = smac_info->vlan_index; + } + } else { + /* no current vlan tag in qp */ + err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); + if (err) + return err; + smac_info->candidate_vid = vlan_tag; + smac_info->candidate_vlan_index = vidx; + smac_info->candidate_vlan_port = port; + smac_info->update_vid = 1; + path->vlan_index = vidx; + } path->feup |= MLX4_FVL_FORCE_ETH_VLAN; + path->fl = 1 << 6; + } else { + /* have current vlan tag. unregister it at modify-qp success */ + if (smac_info->vid < 0x1000) { + smac_info->candidate_vid = 0xFFFF; + smac_info->update_vid = 1; + } } - } else + + /* get smac_index for RoCE use. + * If no smac was yet assigned, register one. + * If one was already assigned, but the new mac differs, + * unregister the old one and register the new one. + */ + if (!smac_info->smac || smac_info->smac != smac) { + /* register candidate now, unreg if needed, after success */ + smac_index = mlx4_register_mac(dev->dev, port, smac); + if (smac_index >= 0) { + smac_info->candidate_smac_index = smac_index; + smac_info->candidate_smac = smac; + smac_info->candidate_smac_port = port; + } else { + return -EINVAL; + } + } else { + smac_index = smac_info->smac_index; + } + + memcpy(path->dmac, ah->dmac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* put MAC table smac index for IBoE */ + path->grh_mylmc = (u8) (smac_index) | 0x80; + } else { path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + } return 0; } static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_ib_qp *mqp, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->ah_attr, mlx4_mac_to_u64((u8 *)qp->smac), (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, - path, port); + path, &mqp->pri, port); } static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_ib_qp *mqp, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->alt_ah_attr, mlx4_mac_to_u64((u8 *)qp->alt_smac), (qp_attr_mask & IB_QP_ALT_VID) ? qp->alt_vlan_id : 0xffff, - path, port); + path, &mqp->alt, port); } static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) @@ -1292,6 +1362,37 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } +static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, + struct mlx4_qp_context *context) +{ + struct net_device *ndev; + u64 u64_mac; + int smac_index; + + + ndev = dev->iboe.netdevs[qp->port - 1]; + if (ndev) { + smac = ndev->dev_addr; + u64_mac = mlx4_mac_to_u64(smac); + } else { + u64_mac = dev->dev->caps.def_mac[qp->port]; + } + + context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); + if (!qp->pri.smac) { + smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); + if (smac_index >= 0) { + qp->pri.candidate_smac_index = smac_index; + qp->pri.candidate_smac = u64_mac; + qp->pri.candidate_smac_port = qp->port; + context->pri_path.grh_mylmc = 0x80 | (u8) smac_index; + } else { + return -ENOENT; + } + } + return 0; +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1403,7 +1504,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { - if (mlx4_set_path(dev, attr, attr_mask, &context->pri_path, + if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) goto out; @@ -1426,7 +1527,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; - if (mlx4_set_alt_path(dev, attr, attr_mask, &context->alt_path, + if (mlx4_set_alt_path(dev, attr, attr_mask, qp, + &context->alt_path, attr->alt_port_num)) goto out; @@ -1532,6 +1634,20 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.fl = 0x80; context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; } + if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) == + IB_LINK_LAYER_ETHERNET) { + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) + context->pri_path.feup = 1 << 7; /* don't fsm */ + /* handle smac_index */ + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { + err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); + if (err) + return -EINVAL; + } + } } if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) @@ -1619,28 +1735,113 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ - if (new_state == IB_QPS_RESET && !ibqp->uobject) { - mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, - ibqp->srq ? to_msrq(ibqp->srq): NULL); - if (send_cq != recv_cq) - mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + if (new_state == IB_QPS_RESET) { + if (!ibqp->uobject) { + mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (send_cq != recv_cq) + mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq_next_wqe = 0; + if (qp->rq.wqe_cnt) + *qp->db.db = 0; - qp->rq.head = 0; - qp->rq.tail = 0; - qp->sq.head = 0; - qp->sq.tail = 0; - qp->sq_next_wqe = 0; - if (qp->rq.wqe_cnt) - *qp->db.db = 0; + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_reg(dev, qp, 0); + } + if (qp->pri.smac) { + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = 0; + } + if (qp->alt.smac) { + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = 0; + } + if (qp->pri.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); + qp->pri.vid = 0xFFFF; + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } - if (qp->flags & MLX4_IB_QP_NETIF) - mlx4_ib_steer_qp_reg(dev, qp, 0); + if (qp->alt.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); + qp->alt.vid = 0xFFFF; + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } } - out: if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); + if (qp->pri.candidate_smac) { + if (err) { + mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac); + } else { + if (qp->pri.smac) + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = qp->pri.candidate_smac; + qp->pri.smac_index = qp->pri.candidate_smac_index; + qp->pri.smac_port = qp->pri.candidate_smac_port; + } + qp->pri.candidate_smac = 0; + qp->pri.candidate_smac_index = 0; + qp->pri.candidate_smac_port = 0; + } + if (qp->alt.candidate_smac) { + if (err) { + mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac); + } else { + if (qp->alt.smac) + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = qp->alt.candidate_smac; + qp->alt.smac_index = qp->alt.candidate_smac_index; + qp->alt.smac_port = qp->alt.candidate_smac_port; + } + qp->alt.candidate_smac = 0; + qp->alt.candidate_smac_index = 0; + qp->alt.candidate_smac_port = 0; + } + + if (qp->pri.update_vid) { + if (err) { + if (qp->pri.candidate_vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port, + qp->pri.candidate_vid); + } else { + if (qp->pri.vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, + qp->pri.vid); + qp->pri.vid = qp->pri.candidate_vid; + qp->pri.vlan_port = qp->pri.candidate_vlan_port; + qp->pri.vlan_index = qp->pri.candidate_vlan_index; + } + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } + + if (qp->alt.update_vid) { + if (err) { + if (qp->alt.candidate_vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port, + qp->alt.candidate_vid); + } else { + if (qp->alt.vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, + qp->alt.vid); + qp->alt.vid = qp->alt.candidate_vid; + qp->alt.vlan_port = qp->alt.candidate_vlan_port; + qp->alt.vlan_index = qp->alt.candidate_vlan_index; + } + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3634eab5e1..706a6d2b538c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -52,6 +52,8 @@ struct mac_res { struct list_head list; u64 mac; + int ref_count; + u8 smac_index; u8 port; }; @@ -1683,11 +1685,39 @@ static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; } -static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port) +static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port, + u8 smac_index, u64 *mac) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct list_head *mac_list = + &tracker->slave_list[slave].res_list[RES_MAC]; + struct mac_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, mac_list, list) { + if (res->smac_index == smac_index && res->port == (u8) port) { + *mac = res->mac; + return 0; + } + } + return -ENOENT; +} + +static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; - struct mac_res *res; + struct list_head *mac_list = + &tracker->slave_list[slave].res_list[RES_MAC]; + struct mac_res *res, *tmp; + + list_for_each_entry_safe(res, tmp, mac_list, list) { + if (res->mac == mac && res->port == (u8) port) { + /* mac found. update ref count */ + ++res->ref_count; + return 0; + } + } if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) return -EINVAL; @@ -1698,6 +1728,8 @@ static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port) } res->mac = mac; res->port = (u8) port; + res->smac_index = smac_index; + res->ref_count = 1; list_add_tail(&res->list, &tracker->slave_list[slave].res_list[RES_MAC]); return 0; @@ -1714,9 +1746,11 @@ static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac, list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->mac == mac && res->port == (u8) port) { - list_del(&res->list); - mlx4_release_resource(dev, slave, RES_MAC, 1, port); - kfree(res); + if (!--res->ref_count) { + list_del(&res->list); + mlx4_release_resource(dev, slave, RES_MAC, 1, port); + kfree(res); + } break; } } @@ -1729,10 +1763,13 @@ static void rem_slave_macs(struct mlx4_dev *dev, int slave) struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; + int i; list_for_each_entry_safe(res, tmp, mac_list, list) { list_del(&res->list); - __mlx4_unregister_mac(dev, res->port, res->mac); + /* dereference the mac the num times the slave referenced it */ + for (i = 0; i < res->ref_count; i++) + __mlx4_unregister_mac(dev, res->port, res->mac); mlx4_release_resource(dev, slave, RES_MAC, 1, res->port); kfree(res); } @@ -1744,6 +1781,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, int err = -EINVAL; int port; u64 mac; + u8 smac_index; if (op != RES_OP_RESERVE_AND_MAP) return err; @@ -1753,12 +1791,13 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, err = __mlx4_register_mac(dev, port, mac); if (err >= 0) { + smac_index = err; set_param_l(out_param, err); err = 0; } if (!err) { - err = mac_add_to_slave(dev, slave, mac, port); + err = mac_add_to_slave(dev, slave, mac, port, smac_index); if (err) __mlx4_unregister_mac(dev, port, mac); } @@ -3306,6 +3345,25 @@ int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } +static int roce_verify_mac(struct mlx4_dev *dev, int slave, + struct mlx4_qp_context *qpc, + struct mlx4_cmd_mailbox *inbox) +{ + u64 mac; + int port; + u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff; + u8 sched = *(u8 *)(inbox->buf + 64); + u8 smac_ix; + + port = (sched >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) { + smac_ix = qpc->pri_path.grh_mylmc & 0x7f; + if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac)) + return -ENOENT; + } + return 0; +} + int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -3328,6 +3386,9 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; + if (roce_verify_mac(dev, slave, qpc, inbox)) + return -EINVAL; + update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); -- cgit v1.2.3 From 5ea8bbfc49291b7e23161fe4de0bf3e4a4e34b18 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:41 +0200 Subject: mlx4: Implement IP based gids support for RoCE/SRIOV Since there is no connection between the MAC/VLAN and the GID when using IP-based addressing, the proxy QP1 (running on the slave) must pass the source-mac, destination-mac, and vlan_id information separately from the GID. Additionally, the Host must pass the remote source-mac and vlan_id back to the slave, This is achieved as follows: Outgoing MADs: 1. Source MAC: obtained from the CQ completion structure (struct ib_wc, smac field). 2. Destination MAC: obtained from the tunnel header 3. vlan_id: obtained from the tunnel header. Incoming MADs 1. The source (i.e., remote) MAC and vlan_id are passed in the tunnel header to the proxy QP1. VST mode support: For outgoing MADs, the vlan_id obtained from the header is discarded, and the vlan_id specified by the Hypervisor is used instead. For incoming MADs, the incoming vlan_id (in the wc) is discarded, and the "invalid" vlan (0xffff) is substituted when forwarding to the slave. Signed-off-by: Moni Shoua Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cq.c | 42 +++++++++++++++++++---------- drivers/infiniband/hw/mlx4/mad.c | 45 ++++++++++++++++++++++++++++---- drivers/infiniband/hw/mlx4/mcg.c | 5 ++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 5 +++- drivers/infiniband/hw/mlx4/qp.c | 11 ++++++-- drivers/net/ethernet/mellanox/mlx4/cmd.c | 24 +++++++++++++++++ include/linux/mlx4/cmd.h | 7 +++++ include/linux/mlx4/device.h | 3 ++- 8 files changed, 117 insertions(+), 25 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index cc40f08ca8f1..5f640814cc81 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -564,7 +564,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) } static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe) + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -574,12 +574,20 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct DMA_FROM_DEVICE); hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); - wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); - wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; wc->dlid_path_bits = 0; + if (is_eth) { + wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); + memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); + memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); + } else { + wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); + wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); + } + return 0; } @@ -594,6 +602,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_srq *msrq = NULL; int is_send; int is_error; + int is_eth; u32 g_mlpath_rqpn; u16 wqe_ctr; unsigned tail = 0; @@ -778,11 +787,15 @@ repoll: break; } + is_eth = (rdma_port_get_link_layer(wc->qp->device, + (*cur_qp)->port) == + IB_LINK_LAYER_ETHERNET); if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, cqe); + return use_tunnel_data(*cur_qp, cq, wc, tail, + cqe, is_eth); } wc->slid = be16_to_cpu(cqe->rlid); @@ -793,20 +806,21 @@ repoll: wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; - if (rdma_port_get_link_layer(wc->qp->device, - (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) + if (is_eth) { wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; - else - wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; - if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { - wc->vlan_id = be16_to_cpu(cqe->sl_vid) & - MLX4_CQE_VID_MASK; + if (be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK) { + wc->vlan_id = be16_to_cpu(cqe->sl_vid) & + MLX4_CQE_VID_MASK; + } else { + wc->vlan_id = 0xffff; + } + memcpy(wc->smac, cqe->smac, ETH_ALEN); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); } else { + wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; wc->vlan_id = 0xffff; } - wc->wc_flags |= IB_WC_WITH_VLAN; - memcpy(wc->smac, cqe->smac, ETH_ALEN); - wc->wc_flags |= IB_WC_WITH_SMAC; } return 0; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c5bca0f0da4a..2c572aed3f6f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -545,11 +545,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, /* adjust tunnel data */ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); - tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; + if (is_eth) { + u16 vlan = 0; + if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan, + NULL)) { + /* VST mode */ + if (vlan != wc->vlan_id) + /* Packet vlan is not the VST-assigned vlan. + * Drop the packet. + */ + goto out; + else + /* Remove the vlan tag before forwarding + * the packet to the VF. + */ + vlan = 0xffff; + } else { + vlan = wc->vlan_id; + } + + tun_mad->hdr.sl_vid = cpu_to_be16(vlan); + memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4); + memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); + } else { + tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); + tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + } + ib_dma_sync_single_for_device(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, sizeof (struct mlx4_rcv_tunnel_mad), @@ -1116,8 +1141,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad) + enum ib_qp_type dest_qpt, u16 pkey_index, + u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, + u8 *s_mac, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; @@ -1206,6 +1232,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.num_sge = 1; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; + if (s_mac) + memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + ret = ib_post_send(send_qp, &wr, &bad_wr); out: @@ -1331,13 +1360,19 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc if (ah_attr.ah_flags & IB_AH_GRH) fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); + memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); + ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + /* if slave have default vlan use it */ + mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, + &ah_attr.vlan_id, &ah_attr.sl); + mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? IB_QPT_SMI : IB_QPT_GSI, be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, &tunnel->mad); + &ah_attr, wc->smac, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 25b2cdff00f8..ed327e6c8fdc 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -215,8 +215,9 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) } mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); spin_unlock(&dev->sm_lock); - return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, - IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad); + return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), + ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, + &ah_attr, NULL, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index febc8f9bc59a..f589522fddfd 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -737,9 +737,12 @@ void mlx4_ib_tunnels_update_work(struct work_struct *work); int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad); + int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad); + u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, + struct ib_mad *mad); + __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 11332f074023..aadf7f82e1f3 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2152,7 +2152,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } if (is_eth) { - u8 smac[6]; + u8 *smac; struct in6_addr in6; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; @@ -2164,7 +2164,12 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); memcpy(&in6, sgid.raw, sizeof(in6)); - rdma_get_ll_mac(&in6, smac); + + if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) + smac = to_mdev(sqp->qp.ibqp.device)-> + iboe.netdevs[sqp->qp.port - 1]->dev_addr; + else /* use the src mac of the tunnel */ + smac = ah->av.eth.s_mac; memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); @@ -2396,6 +2401,8 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_ hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(hdr.mac, ah->av.eth.mac, 6); + hdr.vlan = ah->av.eth.vlan; spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 0d02fba94536..2b0b45ece14b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2289,6 +2289,30 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); + /* mlx4_get_slave_default_vlan - + * return true if VST ( default vlan) + * if VST, will return vlan & qos (if not NULL) + */ +bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, + u16 *vlan, u8 *qos) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_priv *priv; + + priv = mlx4_priv(dev); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + + if (MLX4_VGT != vp_oper->state.default_vlan) { + if (vlan) + *vlan = vp_oper->state.default_vlan; + if (qos) + *qos = vp_oper->state.default_qos; + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan); + int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 79a347238168..009985628257 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -240,6 +240,13 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf); int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); +/* + * mlx4_get_slave_default_vlan - + * return true if VST ( default vlan) + * if VST, will return vlan & qos (if not NULL) + */ +bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, + u16 *vlan, u8 *qos); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 86e02e5c2c77..f211b51dc726 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -632,7 +632,8 @@ struct mlx4_eth_av { u8 hop_limit; __be32 sl_tclass_flowlabel; u8 dgid[16]; - u32 reserved4[2]; + u8 s_mac[6]; + u8 reserved4[2]; __be16 vlan; u8 mac[ETH_ALEN]; }; -- cgit v1.2.3 From ceb5433b3a54979216d794e45147d25c24c94999 Mon Sep 17 00:00:00 2001 From: Shani Michaelli Date: Wed, 12 Mar 2014 12:00:42 +0200 Subject: mlx4_ib: Fix SIDR support of for UD QPs under SRIOV/RoCE * Handle CM_SIDR_REQ_ATTR_ID and CM_SIDR_REP_ATTR_ID in multiplex_cm_handler and demux_cm_handler. * Handle Service ID Resolution messages and REQ messages separately, for their formats are different. Signed-off-by: Shani Michaeli Signed-off-by: Matan Barak Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cm.c | 72 ++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index b8d911543783..56a593e0ae5d 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -61,6 +61,11 @@ struct cm_generic_msg { __be32 remote_comm_id; }; +struct cm_sidr_generic_msg { + struct ib_mad_hdr hdr; + __be32 request_id; +}; + struct cm_req_msg { unsigned char unused[0x60]; union ib_gid primary_path_sgid; @@ -69,28 +74,62 @@ struct cm_req_msg { static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - msg->local_comm_id = cpu_to_be32(cm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + msg->local_comm_id = cpu_to_be32(cm_id); + } } static u32 get_local_comm_id(struct ib_mad *mad) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - - return be32_to_cpu(msg->local_comm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return -1; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + return be32_to_cpu(msg->local_comm_id); + } } static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - msg->remote_comm_id = cpu_to_be32(cm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + msg->remote_comm_id = cpu_to_be32(cm_id); + } } static u32 get_remote_comm_id(struct ib_mad *mad) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - - return be32_to_cpu(msg->remote_comm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return -1; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + return be32_to_cpu(msg->remote_comm_id); + } } static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad) @@ -282,19 +321,21 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id u32 sl_cm_id; int pv_cm_id = -1; - sl_cm_id = get_local_comm_id(mad); - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || - mad->mad_hdr.attr_id == CM_REP_ATTR_ID) { + mad->mad_hdr.attr_id == CM_REP_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + sl_cm_id = get_local_comm_id(mad); id = id_map_alloc(ibdev, slave_id, sl_cm_id); if (IS_ERR(id)) { mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n", __func__, slave_id, sl_cm_id); return PTR_ERR(id); } - } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) { + } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { return 0; } else { + sl_cm_id = get_local_comm_id(mad); id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); } @@ -320,7 +361,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, u32 pv_cm_id; struct id_map_entry *id; - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { + if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { union ib_gid gid; if (!slave) -- cgit v1.2.3 From aa9a2d51a3e70b15a898bec7dde3ce5726fec641 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:43 +0200 Subject: mlx4: Activate RoCE/SRIOV To activate RoCE/SRIOV, need to remove the following: 1. In mlx4_ib_add, need to remove the error return preventing initialization of a RoCE port under SRIOV. 2. In update_vport_qp_params (in resource_tracker.c) need to remove the error return when a RoCE RC or UD qp is detected. This error return causes the INIT-to-RTR qp transition to fail in the wrapper function under RoCE/SRIOV. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 8 -------- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 7 ------- 2 files changed, 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f9c12e92fdd6..1d1750ef000a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1888,14 +1888,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) pr_info_once("%s", mlx4_ib_version); - mlx4_foreach_non_ib_transport_port(i, dev) - num_ports++; - - if (mlx4_is_mfunc(dev) && num_ports) { - dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n"); - return NULL; - } - num_ports = 0; mlx4_foreach_ib_transport_port(i, dev) num_ports++; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 706a6d2b538c..74e490d70184 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -645,7 +645,6 @@ static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_qp_context *qpc = inbox->buf + 8; struct mlx4_vport_oper_state *vp_oper; struct mlx4_priv *priv; - u32 qp_type; int port; port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; @@ -653,12 +652,6 @@ static int update_vport_qp_param(struct mlx4_dev *dev, vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (MLX4_VGT != vp_oper->state.default_vlan) { - qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; - if (MLX4_QP_ST_RC == qp_type || - (MLX4_QP_ST_UD == qp_type && - !mlx4_is_qp_reserved(dev, qpn))) - return -EINVAL; - /* the reserved QPs (special, proxy, tunnel) * do not operate over vlans */ -- cgit v1.2.3 From 7a2cea2aaae2d5eb5c00c49c52180c7c2c66130a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 14 Mar 2014 21:52:07 +0530 Subject: cxgb4/iw_cxgb4: Treat CPL_ERR_KEEPALV_NEG_ADVICE as negative advice Based on original work by Anand Priyadarshee . Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 24 ++++++++++++------------ drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 1 + 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d286bdebe2ab..7e98a58aacfd 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1647,6 +1647,15 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +/* Returns whether a CPL status conveys negative advice. + */ +static int is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} + #define ACT_OPEN_RETRY_COUNT 2 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, @@ -1835,7 +1844,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (status == CPL_ERR_RTX_NEG_ADVICE) { + if (is_neg_adv(status)) { printk(KERN_WARNING MOD "Connection problems for atid %u\n", atid); return 0; @@ -2265,15 +2274,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -2287,7 +2287,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(req); ep = lookup_tid(t, tid); - if (is_neg_adv_abort(req->status)) { + if (is_neg_adv(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); return 0; @@ -3570,7 +3570,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv_abort(req->status)) { + if (is_neg_adv(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); kfree_skb(skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index cd6874b571ee..f2738c710789 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -116,6 +116,7 @@ enum CPL_error { CPL_ERR_KEEPALIVE_TIMEDOUT = 34, CPL_ERR_RTX_NEG_ADVICE = 35, CPL_ERR_PERSIST_NEG_ADVICE = 36, + CPL_ERR_KEEPALV_NEG_ADVICE = 37, CPL_ERR_ABORT_FAILED = 42, CPL_ERR_IWARP_FLM = 50, }; -- cgit v1.2.3 From 05eb23893c2cf9502a9cec0c32e7f1d1ed2895c8 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 14 Mar 2014 21:52:08 +0530 Subject: cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes The current logic suffers from a slow response time to disable user DB usage, and also fails to avoid DB FIFO drops under heavy load. This commit fixes these deficiencies and makes the avoidance logic more optimal. This is done by more efficiently notifying the ULDs of potential DB problems, and implements a smoother flow control algorithm in iw_cxgb4, which is the ULD that puts the most load on the DB fifo. Design: cxgb4: Direct ULD callback from the DB FULL/DROP interrupt handler. This allows the ULD to stop doing user DB writes as quickly as possible. While user DB usage is disabled, the LLD will accumulate DB write events for its queues. Then once DB usage is reenabled, a single DB write is done for each queue with its accumulated write count. This reduces the load put on the DB fifo when reenabling. iw_cxgb4: Instead of marking each qp to indicate DB writes are disabled, we create a device-global status page that each user process maps. This allows iw_cxgb4 to only set this single bit to disable all DB writes for all user QPs vs traversing the idr of all the active QPs. If the libcxgb4 doesn't support this, then we fall back to the old approach of marking each QP. Thus we allow the new driver to work with an older libcxgb4. When the LLD upcalls iw_cxgb4 indicating DB FULL, we disable all DB writes via the status page and transition the DB state to STOPPED. As user processes see that DB writes are disabled, they call into iw_cxgb4 to submit their DB write events. Since the DB state is in STOPPED, the QP trying to write gets enqueued on a new DB "flow control" list. As subsequent DB writes are submitted for this flow controlled QP, the amount of writes are accumulated for each QP on the flow control list. So all the user QPs that are actively ringing the DB get put on this list and the number of writes they request are accumulated. When the LLD upcalls iw_cxgb4 indicating DB EMPTY, which is in a workq context, we change the DB state to FLOW_CONTROL, and begin resuming all the QPs that are on the flow control list. This logic runs on until the flow control list is empty or we exit FLOW_CONTROL mode (due to a DB DROP upcall, for example). QPs are removed from this list, and their accumulated DB write counts written to the DB FIFO. Sets of QPs, called chunks in the code, are removed at one time. The chunk size is 64. So 64 QPs are resumed at a time, and before the next chunk is resumed, the logic waits (blocks) for the DB FIFO to drain. This prevents resuming to quickly and overflowing the FIFO. Once the flow control list is empty, the db state transitions back to NORMAL and user QPs are again allowed to write directly to the user DB register. The algorithm is designed such that if the DB write load is high enough, then all the DB writes get submitted by the kernel using this flow controlled approach to avoid DB drops. As the load lightens though, we resume to normal DB writes directly by user applications. Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/device.c | 177 ++++++++++++++---------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 9 +- drivers/infiniband/hw/cxgb4/provider.c | 43 +++++- drivers/infiniband/hw/cxgb4/qp.c | 140 +++++++++---------- drivers/infiniband/hw/cxgb4/t4.h | 6 + drivers/infiniband/hw/cxgb4/user.h | 5 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 87 +++++++----- drivers/net/ethernet/chelsio/cxgb4/sge.c | 8 +- 9 files changed, 286 insertions(+), 190 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4a033853312e..ba7335fd4ebf 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -64,6 +64,10 @@ struct uld_ctx { static LIST_HEAD(uld_ctx_list); static DEFINE_MUTEX(dev_mutex); +#define DB_FC_RESUME_SIZE 64 +#define DB_FC_RESUME_DELAY 1 +#define DB_FC_DRAIN_THRESH 0 + static struct dentry *c4iw_debugfs_root; struct c4iw_debugfs_data { @@ -282,7 +286,7 @@ static const struct file_operations stag_debugfs_fops = { .llseek = default_llseek, }; -static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"}; +static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"}; static int stats_show(struct seq_file *seq, void *v) { @@ -311,9 +315,10 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); - seq_printf(seq, " DB State: %s Transitions %llu\n", + seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n", db_state_str[dev->db_state], - dev->rdev.stats.db_state_transitions); + dev->rdev.stats.db_state_transitions, + dev->rdev.stats.db_fc_interruptions); seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.act_ofld_conn_fails); @@ -643,6 +648,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); goto err4; } + rdev->status_page = (struct t4_dev_status_page *) + __get_free_page(GFP_KERNEL); + if (!rdev->status_page) { + pr_err(MOD "error allocating status page\n"); + goto err4; + } return 0; err4: c4iw_rqtpool_destroy(rdev); @@ -656,6 +667,7 @@ err1: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); c4iw_destroy_resource(&rdev->resource); @@ -703,18 +715,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pr_info("%s: On-Chip Queues not supported on this device.\n", pci_name(infop->pdev)); - if (!is_t4(infop->adapter_type)) { - if (!allow_db_fc_on_t5) { - db_fc_threshold = 100000; - pr_info("DB Flow Control Disabled.\n"); - } - - if (!allow_db_coalescing_on_t5) { - db_coalescing_threshold = -1; - pr_info("DB Coalescing Disabled.\n"); - } - } - devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); if (!devp) { printk(KERN_ERR MOD "Cannot allocate ib device\n"); @@ -749,6 +749,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); + INIT_LIST_HEAD(&devp->db_fc_list); if (c4iw_debugfs_root) { devp->debugfs_root = debugfs_create_dir( @@ -977,13 +978,16 @@ static int disable_qp_db(int id, void *p, void *data) static void stop_queues(struct uld_ctx *ctx) { - spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->db_state == NORMAL) { - ctx->dev->rdev.stats.db_state_transitions++; - ctx->dev->db_state = FLOW_CONTROL; + unsigned long flags; + + spin_lock_irqsave(&ctx->dev->lock, flags); + ctx->dev->rdev.stats.db_state_transitions++; + ctx->dev->db_state = STOPPED; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - } - spin_unlock_irq(&ctx->dev->lock); + else + ctx->dev->rdev.status_page->db_off = 1; + spin_unlock_irqrestore(&ctx->dev->lock, flags); } static int enable_qp_db(int id, void *p, void *data) @@ -994,15 +998,70 @@ static int enable_qp_db(int id, void *p, void *data) return 0; } +static void resume_rc_qp(struct c4iw_qp *qp) +{ + spin_lock(&qp->lock); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc); + qp->wq.sq.wq_pidx_inc = 0; + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc); + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); +} + +static void resume_a_chunk(struct uld_ctx *ctx) +{ + int i; + struct c4iw_qp *qp; + + for (i = 0; i < DB_FC_RESUME_SIZE; i++) { + qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp, + db_fc_entry); + list_del_init(&qp->db_fc_entry); + resume_rc_qp(qp); + if (list_empty(&ctx->dev->db_fc_list)) + break; + } +} + static void resume_queues(struct uld_ctx *ctx) { spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->qpcnt <= db_fc_threshold && - ctx->dev->db_state == FLOW_CONTROL) { - ctx->dev->db_state = NORMAL; - ctx->dev->rdev.stats.db_state_transitions++; - idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); + if (ctx->dev->db_state != STOPPED) + goto out; + ctx->dev->db_state = FLOW_CONTROL; + while (1) { + if (list_empty(&ctx->dev->db_fc_list)) { + WARN_ON(ctx->dev->db_state != FLOW_CONTROL); + ctx->dev->db_state = NORMAL; + ctx->dev->rdev.stats.db_state_transitions++; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + idr_for_each(&ctx->dev->qpidr, enable_qp_db, + NULL); + } else { + ctx->dev->rdev.status_page->db_off = 0; + } + break; + } else { + if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) + < (ctx->dev->rdev.lldi.dbfifo_int_thresh << + DB_FC_DRAIN_THRESH)) { + resume_a_chunk(ctx); + } + if (!list_empty(&ctx->dev->db_fc_list)) { + spin_unlock_irq(&ctx->dev->lock); + if (DB_FC_RESUME_DELAY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(DB_FC_RESUME_DELAY); + } + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->db_state != FLOW_CONTROL) + break; + } + } } +out: + if (ctx->dev->db_state != NORMAL) + ctx->dev->rdev.stats.db_fc_interruptions++; spin_unlock_irq(&ctx->dev->lock); } @@ -1028,12 +1087,12 @@ static int count_qps(int id, void *p, void *data) return 0; } -static void deref_qps(struct qp_list qp_list) +static void deref_qps(struct qp_list *qp_list) { int idx; - for (idx = 0; idx < qp_list.idx; idx++) - c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp); + for (idx = 0; idx < qp_list->idx; idx++) + c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp); } static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) @@ -1044,17 +1103,22 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) for (idx = 0; idx < qp_list->idx; idx++) { struct c4iw_qp *qp = qp_list->qps[idx]; + spin_lock_irq(&qp->rhp->lock); + spin_lock(&qp->lock); ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.sq.qid, t4_sq_host_wq_pidx(&qp->wq), t4_sq_wq_size(&qp->wq)); if (ret) { - printk(KERN_ERR MOD "%s: Fatal error - " + pr_err(KERN_ERR MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing SQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.sq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); return; } + qp->wq.sq.wq_pidx_inc = 0; ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.rq.qid, @@ -1062,12 +1126,17 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) t4_rq_wq_size(&qp->wq)); if (ret) { - printk(KERN_ERR MOD "%s: Fatal error - " + pr_err(KERN_ERR MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing RQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.rq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); return; } + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); /* Wait for the dbfifo to drain */ while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { @@ -1083,36 +1152,22 @@ static void recover_queues(struct uld_ctx *ctx) struct qp_list qp_list; int ret; - /* lock out kernel db ringers */ - mutex_lock(&ctx->dev->db_mutex); - - /* put all queues in to recovery mode */ - spin_lock_irq(&ctx->dev->lock); - ctx->dev->db_state = RECOVERY; - ctx->dev->rdev.stats.db_state_transitions++; - idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - spin_unlock_irq(&ctx->dev->lock); - /* slow everybody down */ set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(usecs_to_jiffies(1000)); - /* Wait for the dbfifo to completely drain. */ - while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(10)); - } - /* flush the SGE contexts */ ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]); if (ret) { printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", pci_name(ctx->lldi.pdev)); - goto out; + return; } /* Count active queues so we can build a list of queues to recover */ spin_lock_irq(&ctx->dev->lock); + WARN_ON(ctx->dev->db_state != STOPPED); + ctx->dev->db_state = RECOVERY; idr_for_each(&ctx->dev->qpidr, count_qps, &count); qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); @@ -1120,7 +1175,7 @@ static void recover_queues(struct uld_ctx *ctx) printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", pci_name(ctx->lldi.pdev)); spin_unlock_irq(&ctx->dev->lock); - goto out; + return; } qp_list.idx = 0; @@ -1133,29 +1188,13 @@ static void recover_queues(struct uld_ctx *ctx) recover_lost_dbs(ctx, &qp_list); /* we're almost done! deref the qps and clean up */ - deref_qps(qp_list); + deref_qps(&qp_list); kfree(qp_list.qps); - /* Wait for the dbfifo to completely drain again */ - while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(10)); - } - - /* resume the queues */ spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->qpcnt > db_fc_threshold) - ctx->dev->db_state = FLOW_CONTROL; - else { - ctx->dev->db_state = NORMAL; - idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); - } - ctx->dev->rdev.stats.db_state_transitions++; + WARN_ON(ctx->dev->db_state != RECOVERY); + ctx->dev->db_state = STOPPED; spin_unlock_irq(&ctx->dev->lock); - -out: - /* start up kernel db ringers again */ - mutex_unlock(&ctx->dev->db_mutex); } static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) @@ -1165,9 +1204,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) switch (control) { case CXGB4_CONTROL_DB_FULL: stop_queues(ctx); - mutex_lock(&ctx->dev->rdev.stats.lock); ctx->dev->rdev.stats.db_full++; - mutex_unlock(&ctx->dev->rdev.stats.lock); break; case CXGB4_CONTROL_DB_EMPTY: resume_queues(ctx); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 23eaeabab93b..eb18f9be35e4 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -109,6 +109,7 @@ struct c4iw_dev_ucontext { enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), + T4_STATUS_PAGE_DISABLED = (1<<1), }; struct c4iw_stat { @@ -130,6 +131,7 @@ struct c4iw_stats { u64 db_empty; u64 db_drop; u64 db_state_transitions; + u64 db_fc_interruptions; u64 tcam_full; u64 act_ofld_conn_fails; u64 pas_ofld_conn_fails; @@ -150,6 +152,7 @@ struct c4iw_rdev { unsigned long oc_mw_pa; void __iomem *oc_mw_kva; struct c4iw_stats stats; + struct t4_dev_status_page *status_page; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -211,7 +214,8 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, enum db_state { NORMAL = 0, FLOW_CONTROL = 1, - RECOVERY = 2 + RECOVERY = 2, + STOPPED = 3 }; struct c4iw_dev { @@ -225,10 +229,10 @@ struct c4iw_dev { struct mutex db_mutex; struct dentry *debugfs_root; enum db_state db_state; - int qpcnt; struct idr hwtid_idr; struct idr atid_idr; struct idr stid_idr; + struct list_head db_fc_list; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -432,6 +436,7 @@ struct c4iw_qp_attributes { struct c4iw_qp { struct ib_qp ibqp; + struct list_head db_fc_entry; struct c4iw_dev *rhp; struct c4iw_ep *ep; struct c4iw_qp_attributes attr; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7e94c9a656a1..e36d2a27c431 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -106,15 +106,54 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, { struct c4iw_ucontext *context; struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + static int warned; + struct c4iw_alloc_ucontext_resp uresp; + int ret = 0; + struct c4iw_mm_entry *mm = NULL; PDBG("%s ibdev %p\n", __func__, ibdev); context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); + if (!context) { + ret = -ENOMEM; + goto err; + } + c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); + + if (udata->outlen < sizeof(uresp)) { + if (!warned++) + pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled."); + rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; + } else { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) + goto err_free; + + uresp.status_page_size = PAGE_SIZE; + + spin_lock(&context->mmap_lock); + uresp.status_page_key = context->key; + context->key += PAGE_SIZE; + spin_unlock(&context->mmap_lock); + + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_mm; + + mm->key = uresp.status_page_key; + mm->addr = virt_to_phys(rhp->rdev.status_page); + mm->len = PAGE_SIZE; + insert_mmap(context, mm); + } return &context->ibucontext; +err_mm: + kfree(mm); +err_free: + kfree(context); +err: + return ERR_PTR(ret); } static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 582936708e6e..3b62eb556a47 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -638,6 +638,46 @@ void c4iw_qp_rem_ref(struct ib_qp *qp) wake_up(&(to_c4iw_qp(qp)->wait)); } +static void add_to_fc_list(struct list_head *head, struct list_head *entry) +{ + if (list_empty(entry)) + list_add_tail(entry, head); +} + +static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) { + t4_ring_sq_db(&qhp->wq, inc); + } else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.sq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + +static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) { + t4_ring_rq_db(&qhp->wq, inc); + } else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.rq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -750,9 +790,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (t4_wq_db_enabled(&qhp->wq)) + if (!qhp->rhp->rdev.status_page->db_off) { t4_ring_sq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_sq_db(qhp, idx); + } return err; } @@ -812,9 +856,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wr = wr->next; num_wrs--; } - if (t4_wq_db_enabled(&qhp->wq)) + if (!qhp->rhp->rdev.status_page->db_off) { t4_ring_rq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_rq_db(qhp, idx); + } return err; } @@ -1200,35 +1248,6 @@ out: return ret; } -/* - * Called by the library when the qp has user dbs disabled due to - * a DB_FULL condition. This function will single-thread all user - * DB rings to avoid overflowing the hw db-fifo. - */ -static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc) -{ - int delay = db_delay_usecs; - - mutex_lock(&qhp->rhp->db_mutex); - do { - - /* - * The interrupt threshold is dbfifo_int_thresh << 6. So - * make sure we don't cross that and generate an interrupt. - */ - if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < - (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { - writel(QID(qid) | PIDX(inc), qhp->wq.db); - break; - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(delay)); - delay = min(delay << 1, 2000); - } while (1); - mutex_unlock(&qhp->rhp->db_mutex); - return 0; -} - int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, @@ -1278,11 +1297,11 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } if (mask & C4IW_QP_ATTR_SQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc); + ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc); goto out; } if (mask & C4IW_QP_ATTR_RQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc); + ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc); goto out; } @@ -1465,14 +1484,6 @@ out: return ret; } -static int enable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_enable_wq_db(&qp->wq); - return 0; -} - int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; @@ -1490,22 +1501,15 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - spin_lock_irq(&rhp->lock); - remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); - rhp->qpcnt--; - BUG_ON(rhp->qpcnt < 0); - if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = NORMAL; - idr_for_each(&rhp->qpidr, enable_qp_db, NULL); - } - if (db_coalescing_threshold >= 0) - if (rhp->qpcnt <= db_coalescing_threshold) - cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]); - spin_unlock_irq(&rhp->lock); + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + spin_lock_irq(&rhp->lock); + if (!list_empty(&qhp->db_fc_entry)) + list_del_init(&qhp->db_fc_entry); + spin_unlock_irq(&rhp->lock); + ucontext = ib_qp->uobject ? to_c4iw_ucontext(ib_qp->uobject->context) : NULL; destroy_qp(&rhp->rdev, &qhp->wq, @@ -1516,14 +1520,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) return 0; } -static int disable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_disable_wq_db(&qp->wq); - return 0; -} - struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { @@ -1610,20 +1606,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - spin_lock_irq(&rhp->lock); - if (rhp->db_state != NORMAL) - t4_disable_wq_db(&qhp->wq); - rhp->qpcnt++; - if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = FLOW_CONTROL; - idr_for_each(&rhp->qpidr, disable_qp_db, NULL); - } - if (db_coalescing_threshold >= 0) - if (rhp->qpcnt > db_coalescing_threshold) - cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]); - ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); - spin_unlock_irq(&rhp->lock); + ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) goto err2; @@ -1709,6 +1692,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); + INIT_LIST_HEAD(&qhp->db_fc_entry); PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n", __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.sq.qid); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index e73ace739183..eeca8b1e6376 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -300,6 +300,7 @@ struct t4_sq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; u16 flags; short flush_cidx; }; @@ -324,6 +325,7 @@ struct t4_rq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; }; struct t4_wq { @@ -609,3 +611,7 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq) ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; } #endif + +struct t4_dev_status_page { + u8 db_off; +}; diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 32b754c35ab7..11ccd276e5d9 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -70,4 +70,9 @@ struct c4iw_create_qp_resp { __u32 qid_mask; __u32 flags; }; + +struct c4iw_alloc_ucontext_resp { + __u64 status_page_key; + __u32 status_page_size; +}; #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 50abe1d61287..32db37709263 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -500,6 +500,7 @@ struct sge_txq { spinlock_t db_lock; int db_disabled; unsigned short db_pidx; + unsigned short db_pidx_inc; u64 udb; }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0ac53dd84c61..cc04d090354c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3578,14 +3578,25 @@ static void drain_db_fifo(struct adapter *adap, int usecs) static void disable_txq_db(struct sge_txq *q) { - spin_lock_irq(&q->db_lock); + unsigned long flags; + + spin_lock_irqsave(&q->db_lock, flags); q->db_disabled = 1; - spin_unlock_irq(&q->db_lock); + spin_unlock_irqrestore(&q->db_lock, flags); } -static void enable_txq_db(struct sge_txq *q) +static void enable_txq_db(struct adapter *adap, struct sge_txq *q) { spin_lock_irq(&q->db_lock); + if (q->db_pidx_inc) { + /* Make sure that all writes to the TX descriptors + * are committed before we tell HW about them. + */ + wmb(); + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), + QID(q->cntxt_id) | PIDX(q->db_pidx_inc)); + q->db_pidx_inc = 0; + } q->db_disabled = 0; spin_unlock_irq(&q->db_lock); } @@ -3607,11 +3618,32 @@ static void enable_dbs(struct adapter *adap) int i; for_each_ethrxq(&adap->sge, i) - enable_txq_db(&adap->sge.ethtxq[i].q); + enable_txq_db(adap, &adap->sge.ethtxq[i].q); for_each_ofldrxq(&adap->sge, i) - enable_txq_db(&adap->sge.ofldtxq[i].q); + enable_txq_db(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) - enable_txq_db(&adap->sge.ctrlq[i].q); + enable_txq_db(adap, &adap->sge.ctrlq[i].q); +} + +static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) +{ + if (adap->uld_handle[CXGB4_ULD_RDMA]) + ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], + cmd); +} + +static void process_db_full(struct work_struct *work) +{ + struct adapter *adap; + + adap = container_of(work, struct adapter, db_full_task); + + drain_db_fifo(adap, dbfifo_drain_delay); + enable_dbs(adap); + notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY); + t4_set_reg_field(adap, SGE_INT_ENABLE3, + DBFIFO_HP_INT | DBFIFO_LP_INT, + DBFIFO_HP_INT | DBFIFO_LP_INT); } static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q) @@ -3619,7 +3651,7 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q) u16 hw_pidx, hw_cidx; int ret; - spin_lock_bh(&q->db_lock); + spin_lock_irq(&q->db_lock); ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx); if (ret) goto out; @@ -3636,7 +3668,8 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q) } out: q->db_disabled = 0; - spin_unlock_bh(&q->db_lock); + q->db_pidx_inc = 0; + spin_unlock_irq(&q->db_lock); if (ret) CH_WARN(adap, "DB drop recovery failed.\n"); } @@ -3652,29 +3685,6 @@ static void recover_all_queues(struct adapter *adap) sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); } -static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) -{ - mutex_lock(&uld_mutex); - if (adap->uld_handle[CXGB4_ULD_RDMA]) - ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], - cmd); - mutex_unlock(&uld_mutex); -} - -static void process_db_full(struct work_struct *work) -{ - struct adapter *adap; - - adap = container_of(work, struct adapter, db_full_task); - - notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL); - drain_db_fifo(adap, dbfifo_drain_delay); - t4_set_reg_field(adap, SGE_INT_ENABLE3, - DBFIFO_HP_INT | DBFIFO_LP_INT, - DBFIFO_HP_INT | DBFIFO_LP_INT); - notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY); -} - static void process_db_drop(struct work_struct *work) { struct adapter *adap; @@ -3682,11 +3692,13 @@ static void process_db_drop(struct work_struct *work) adap = container_of(work, struct adapter, db_drop_task); if (is_t4(adap->params.chip)) { - disable_dbs(adap); + drain_db_fifo(adap, dbfifo_drain_delay); notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP); - drain_db_fifo(adap, 1); + drain_db_fifo(adap, dbfifo_drain_delay); recover_all_queues(adap); + drain_db_fifo(adap, dbfifo_drain_delay); enable_dbs(adap); + notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY); } else { u32 dropped_db = t4_read_reg(adap, 0x010ac); u16 qid = (dropped_db >> 15) & 0x1ffff; @@ -3727,6 +3739,8 @@ static void process_db_drop(struct work_struct *work) void t4_db_full(struct adapter *adap) { if (is_t4(adap->params.chip)) { + disable_dbs(adap); + notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL); t4_set_reg_field(adap, SGE_INT_ENABLE3, DBFIFO_HP_INT | DBFIFO_LP_INT, 0); queue_work(workq, &adap->db_full_task); @@ -3735,8 +3749,11 @@ void t4_db_full(struct adapter *adap) void t4_db_dropped(struct adapter *adap) { - if (is_t4(adap->params.chip)) - queue_work(workq, &adap->db_drop_task); + if (is_t4(adap->params.chip)) { + disable_dbs(adap); + notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL); + } + queue_work(workq, &adap->db_drop_task); } static void uld_attach(struct adapter *adap, unsigned int uld) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 46429f9d0592..d4db382ff8c7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -860,9 +860,10 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { unsigned int *wr, index; + unsigned long flags; wmb(); /* write descriptors before telling HW */ - spin_lock(&q->db_lock); + spin_lock_irqsave(&q->db_lock, flags); if (!q->db_disabled) { if (is_t4(adap->params.chip)) { t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), @@ -878,9 +879,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) writel(n, adap->bar2 + q->udb + 8); wmb(); } - } + } else + q->db_pidx_inc += n; q->db_pidx = q->pidx; - spin_unlock(&q->db_lock); + spin_unlock_irqrestore(&q->db_lock, flags); } /** -- cgit v1.2.3 From 82373701be26b893eaf7372db0af84235a51998a Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 19 Mar 2014 18:11:49 +0200 Subject: IB/mlx4_ib: Adapt code to use caps.num_ports instead of a constant Some code in the mlx4 IB driver stack assumed MLX4_MAX_PORTS ports. Instead, we should only loop until the number of actual ports in i the device, which is stored in dev->caps.num_ports. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1d1750ef000a..2ff428b17088 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1546,7 +1546,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, iboe = &ibdev->iboe; spin_lock(&iboe->lock); - for (port = 1; port <= MLX4_MAX_PORTS; ++port) + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) || (!netif_is_bond_master(real_dev) && @@ -1569,14 +1569,14 @@ static u8 mlx4_ib_get_dev_port(struct net_device *dev, iboe = &ibdev->iboe; - for (port = 1; port <= MLX4_MAX_PORTS; ++port) + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) || (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1]))) break; - if ((port == 0) || (port > MLX4_MAX_PORTS)) + if ((port == 0) || (port > ibdev->dev->caps.num_ports)) return 0; else return port; @@ -1626,7 +1626,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, union ib_gid gid; - if ((port == 0) || (port > MLX4_MAX_PORTS)) + if ((port == 0) || (port > ibdev->dev->caps.num_ports)) return; /* IPv4 gids */ -- cgit v1.2.3 From 449fc48866f7d84b0d9a19201de18a4dd4d3488c Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 19 Mar 2014 18:11:52 +0200 Subject: net/mlx4: Adapt code for N-Port VF Adds support for N-Port VFs, this includes: 1. Adding support in the wrapped FW command In wrapped commands, we need to verify and convert the slave's port into the real physical port. Furthermore, when sending the response back to the slave, a reverse conversion should be made. 2. Adjusting sqpn for QP1 para-virtualization The slave assumes that sqpn is used for QP1 communication. If the slave is assigned to a port != (first port), we need to adjust the sqpn that will direct its QP1 packets into the correct endpoint. 3. Adjusting gid[5] to modify the port for raw ethernet In B0 steering, gid[5] contains the port. It needs to be adjusted into the physical port. 4. Adjusting number of ports in the query / ports caps in the FW commands When a slave queries the hardware, it needs to view only the physical ports it's assigned to. 5. Adjusting the sched_qp according to the port number The QP port is encoded in the sched_qp, thus in modify_qp we need to encode the correct port in sched_qp. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mad.c | 29 ++-- drivers/infiniband/hw/mlx4/main.c | 15 +- drivers/infiniband/hw/mlx4/sysfs.c | 5 + drivers/net/ethernet/mellanox/mlx4/cmd.c | 23 ++- drivers/net/ethernet/mellanox/mlx4/eq.c | 48 +++++-- drivers/net/ethernet/mellanox/mlx4/fw.c | 62 +++++++- drivers/net/ethernet/mellanox/mlx4/main.c | 4 +- drivers/net/ethernet/mellanox/mlx4/mcg.c | 5 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 +- drivers/net/ethernet/mellanox/mlx4/port.c | 158 ++++++++++++++++++--- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 143 ++++++++++++++++--- include/linux/mlx4/device.h | 1 + 12 files changed, 417 insertions(+), 79 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 2c572aed3f6f..fd36ec672632 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1245,21 +1245,9 @@ out: static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) { - int gids; - int vfs; - if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) return slave; - - gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = dev->dev->num_vfs; - - if (slave == 0) - return 0; - if (slave <= gids % vfs) - return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); - - return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); + return mlx4_get_base_gid_ix(dev->dev, slave, port); } static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, @@ -1281,6 +1269,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc struct ib_ah_attr ah_attr; u8 *slave_id; int slave; + int port; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1360,6 +1349,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc if (ah_attr.ah_flags & IB_AH_GRH) fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); + port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num); + if (port < 0) + return; + ah_attr.port_num = port; memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); /* if slave have default vlan use it */ @@ -1949,7 +1942,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->port = port; ctx->ib_dev = &dev->ib_dev; - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + for (i = 0; + i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev->dev, i); + + if (!test_bit(port - 1, actv_ports.ports)) + continue; + ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); if (ret) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2ff428b17088..6cb85467dde7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2323,17 +2323,24 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; + struct mlx4_active_ports actv_ports; + unsigned int ports; + unsigned int first_port; if (!mlx4_is_master(dev)) return; - dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); + actv_ports = mlx4_get_active_ports(dev, slave); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); + + dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); goto out; } - for (i = 0; i < dev->caps.num_ports; i++) { + for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); @@ -2345,9 +2352,9 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) } } /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < dev->caps.num_ports; i++) { + for (i = 0; i < ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); - dm[i]->port = i + 1; + dm[i]->port = first_port + i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index db2ea31df832..5a38e43eca65 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -627,6 +627,7 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) int port; struct kobject *p, *t; struct mlx4_port *mport; + struct mlx4_active_ports actv_ports; get_name(dev, name, slave, sizeof name); @@ -649,7 +650,11 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) goto err_ports; } + actv_ports = mlx4_get_active_ports(dev->dev, slave); + for (port = 1; port <= dev->dev->caps.num_ports; ++port) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; err = add_port(dev, port, slave); if (err) goto err_add; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 59a1b2703281..516c1dd4963b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1643,8 +1643,16 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; - - for (port = 1; port <= MLX4_MAX_PORTS; port++) { + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + int min_port = find_first_bit(actv_ports.ports, + priv->dev.caps.num_ports) + 1; + int max_port = min_port - 1 + + bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports); + + for (port = min_port; port <= max_port; port++) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; @@ -1685,8 +1693,17 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave { int port; struct mlx4_vport_oper_state *vp_oper; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports( + &priv->dev, slave); + int min_port = find_first_bit(actv_ports.ports, + priv->dev.caps.num_ports) + 1; + int max_port = min_port - 1 + + bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports); + - for (port = 1; port <= MLX4_MAX_PORTS; port++) { + for (port = min_port; port <= max_port; port++) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8992b38578d5..d501a2b0fb79 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -271,7 +271,10 @@ enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) { + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return SLAVE_PORT_DOWN; @@ -285,8 +288,10 @@ static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return -1; @@ -300,9 +305,13 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) { int i; enum slave_port_gen_event gen_event; + struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev, + port); - for (i = 0; i < dev->num_slaves; i++) - set_and_calc_slave_port_state(dev, i, port, event, &gen_event); + for (i = 0; i < dev->num_vfs + 1; i++) + if (test_bit(i, slaves_pport.slaves)) + set_and_calc_slave_port_state(dev, i, port, + event, &gen_event); } /************************************************************************** The function get as input the new event to that port, @@ -321,12 +330,14 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, struct mlx4_slave_state *ctx = NULL; unsigned long flags; int ret = -1; + struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); enum slave_port_state cur_state = mlx4_get_slave_port_state(dev, slave, port); *gen_event = SLAVE_PORT_GEN_EVENT_NONE; - if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + if (slave >= dev->num_slaves || port > dev->caps.num_ports || + port <= 0 || !test_bit(port - 1, actv_ports.ports)) { pr_err("%s: Error: asking for slave:%d, port:%d\n", __func__, slave, port); return ret; @@ -542,15 +553,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) be64_to_cpu(eqe->event.cmd.out_param)); break; - case MLX4_EVENT_TYPE_PORT_CHANGE: + case MLX4_EVENT_TYPE_PORT_CHANGE: { + struct mlx4_slaves_pport slaves_port; port = be32_to_cpu(eqe->event.port_change.port) >> 28; + slaves_port = mlx4_phys_to_slaves_pport(dev, port); if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; - for (i = 0; i < dev->num_slaves; i++) { + for (i = 0; i < dev->num_vfs + 1; i++) { + if (!test_bit(i, slaves_port.slaves)) + continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { if (i == mlx4_master_func_num(dev)) continue; @@ -558,8 +573,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) " to slave: %d, port:%d\n", __func__, i, port); s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; - if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (mlx4_phys_to_slave_port(dev, i, port) << 28)); mlx4_slave_event(dev, i, eqe); + } } else { /* IB port */ set_and_calc_slave_port_state(dev, i, port, MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, @@ -580,12 +600,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!mlx4_is_master(dev)) break; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - for (i = 0; i < dev->num_slaves; i++) { + for (i = 0; i < dev->num_vfs + 1; i++) { + if (!test_bit(i, slaves_port.slaves)) + continue; if (i == mlx4_master_func_num(dev)) continue; s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; - if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) { + eqe->event.port_change.port = + cpu_to_be32( + (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF) + | (mlx4_phys_to_slave_port(dev, i, port) << 28)); mlx4_slave_event(dev, i, eqe); + } } else /* IB port */ /* port-up event will be sent to a slave when the @@ -594,6 +621,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); } break; + } case MLX4_EVENT_TYPE_CQ_ERROR: mlx4_warn(dev, "CQ %s on CQN %06x\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d0d8dd832557..6bd33e2fc17c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -225,13 +225,25 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, slave); + int converted_port = mlx4_slave_convert_port( + dev, slave, vhcr->in_modifier); + + if (converted_port < 0) + return -EINVAL; + + vhcr->in_modifier = converted_port; /* Set nic_info bit to mark new fields support */ field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); - field = vhcr->in_modifier; /* phys-port = logical-port */ + /* phys-port = logical-port */ + field = vhcr->in_modifier - + find_first_bit(actv_ports.ports, dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + field = vhcr->in_modifier; /* size is now the QP number */ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); @@ -249,12 +261,16 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, QUERY_FUNC_CAP_PHYS_PORT_ID); } else if (vhcr->op_modifier == 0) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, slave); /* enable rdma and ethernet interfaces, and new quota locations */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); - field = dev->caps.num_ports; + field = min( + bitmap_weight(actv_ports.ports, dev->caps.num_ports), + dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); size = dev->caps.function_caps; /* set PF behaviours */ @@ -840,6 +856,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, int err = 0; u8 field; u32 bmme_flags; + int real_port; + int slave_port; + int first_port; + struct mlx4_active_ports actv_ports; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); @@ -852,8 +872,26 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; + actv_ports = mlx4_get_active_ports(dev, slave); + first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); + for (slave_port = 0, real_port = first_port; + real_port < first_port + + bitmap_weight(actv_ports.ports, dev->caps.num_ports); + ++real_port, ++slave_port) { + if (flags & (MLX4_DEV_CAP_FLAG_WOL_PORT1 << real_port)) + flags |= MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port; + else + flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); + } + for (; slave_port < dev->caps.num_ports; ++slave_port) + flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VL_PORT_OFFSET); + field &= ~0x0F; + field |= bitmap_weight(actv_ports.ports, dev->caps.num_ports) & 0x0F; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VL_PORT_OFFSET); + /* For guests, disable timestamp */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); field &= 0x7f; @@ -903,12 +941,20 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, u16 short_field; int err; int admin_link_state; + int port = mlx4_slave_convert_port(dev, slave, + vhcr->in_modifier & 0xFF); #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 #define MLX4_PORT_LINK_UP_MASK 0x80 #define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c #define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e + if (port < 0) + return -EINVAL; + + vhcr->in_modifier = (vhcr->in_modifier & ~0xFF) | + (port & 0xFF); + err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -936,7 +982,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, QUERY_PORT_SUPPORTED_TYPE_OFFSET); if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) - short_field = mlx4_get_slave_num_gids(dev, slave); + short_field = mlx4_get_slave_num_gids(dev, slave, port); else short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, @@ -1588,9 +1634,12 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - int port = vhcr->in_modifier; + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); int err; + if (port < 0) + return -EINVAL; + if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; @@ -1680,9 +1729,12 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - int port = vhcr->in_modifier; + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); int err; + if (port < 0) + return -EINVAL; + if (!(priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e1a55857af71..472925428de7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1471,7 +1471,7 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) for (i = 1; i <= dev->caps.num_ports; i++) { if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.gid_table_len[i] = - mlx4_get_slave_num_gids(dev, 0); + mlx4_get_slave_num_gids(dev, 0, i); else dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = @@ -1498,7 +1498,7 @@ static void choose_steering_mode(struct mlx4_dev *dev, if (mlx4_log_num_mgm_entry_size == -1 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || - (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && + (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index db7dc0b6667d..80ccb4edf825 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1387,9 +1387,12 @@ int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { u32 qpn = (u32) vhcr->in_param & 0xffffffff; - u8 port = vhcr->in_param >> 62; + int port = mlx4_slave_convert_port(dev, slave, vhcr->in_param >> 62); enum mlx4_steer_type steer = vhcr->in_modifier; + if (port < 0) + return -EINVAL; + /* Promiscuous unicast is not allowed in mfunc */ if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index fe8715e35afa..9fca6c150de3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1287,8 +1287,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); void mlx4_init_quotas(struct mlx4_dev *dev); -int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); -int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index ece328166e94..2705b9ab9463 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -507,30 +507,82 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) } static struct mlx4_roce_gid_entry zgid_entry; -int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave) +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port) { + int vfs; + int slave_gid = slave; + unsigned i; + struct mlx4_slaves_pport slaves_pport; + struct mlx4_active_ports actv_ports; + unsigned max_port_p_one; + if (slave == 0) return MLX4_ROCE_PF_GIDS; - if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs)) - return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1; - return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs; + + /* Slave is a VF */ + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + actv_ports = mlx4_get_active_ports(dev, slave); + max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_slaves_pport slaves_pport_actv; + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + if (i == port) + continue; + slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid -= bitmap_weight(slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs)) + return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1; + return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs; } -int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave) +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port) { int gids; + unsigned i; + int slave_gid = slave; int vfs; - gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = dev->num_vfs; + struct mlx4_slaves_pport slaves_pport; + struct mlx4_active_ports actv_ports; + unsigned max_port_p_one; if (slave == 0) return 0; - if (slave <= gids % vfs) - return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); - return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + actv_ports = mlx4_get_active_ports(dev, slave); + max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_slaves_pport slaves_pport_actv; + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + if (i == port) + continue; + slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid -= bitmap_weight(slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + if (slave_gid <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + + ((gids / vfs) * (slave_gid - 1)); } +EXPORT_SYMBOL_GPL(mlx4_get_base_gid_ix); static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) @@ -617,8 +669,8 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, * need a FOR-loop here over number of gids the guest has. * 1. Check no duplicates in gids passed by slave */ - num_gids = mlx4_get_slave_num_gids(dev, slave); - base = mlx4_get_base_gid_ix(dev, slave); + num_gids = mlx4_get_slave_num_gids(dev, slave, port); + base = mlx4_get_base_gid_ix(dev, slave, port); gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); for (i = 0; i < num_gids; gid_entry_mbox++, i++) { if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, @@ -738,6 +790,15 @@ int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + int port = mlx4_slave_convert_port( + dev, slave, vhcr->in_modifier & 0xFF); + + if (port < 0) + return -EINVAL; + + vhcr->in_modifier = (vhcr->in_modifier & ~0xFF) | + (port & 0xFF); + return mlx4_common_set_port(dev, slave, vhcr->in_modifier, vhcr->op_modifier, inbox); } @@ -1026,10 +1087,16 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, struct mlx4_priv *priv = mlx4_priv(dev); int i, found_ix = -1; int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + struct mlx4_slaves_pport slaves_pport; + unsigned num_vfs; + int slave_gid; if (!mlx4_is_mfunc(dev)) return -EINVAL; + slaves_pport = mlx4_phys_to_slaves_pport(dev, port); + num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) { found_ix = i; @@ -1039,16 +1106,67 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, if (found_ix >= 0) { if (found_ix < MLX4_ROCE_PF_GIDS) - *slave_id = 0; - else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) * - (vf_gids / dev->num_vfs + 1)) - *slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) / - (vf_gids / dev->num_vfs + 1)) + 1; + slave_gid = 0; + else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % num_vfs) * + (vf_gids / num_vfs + 1)) + slave_gid = ((found_ix - MLX4_ROCE_PF_GIDS) / + (vf_gids / num_vfs + 1)) + 1; else - *slave_id = + slave_gid = ((found_ix - MLX4_ROCE_PF_GIDS - - ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) / - (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1; + ((vf_gids % num_vfs) * ((vf_gids / num_vfs + 1)))) / + (vf_gids / num_vfs)) + vf_gids % num_vfs + 1; + + if (slave_gid) { + struct mlx4_active_ports exclusive_ports; + struct mlx4_active_ports actv_ports; + struct mlx4_slaves_pport slaves_pport_actv; + unsigned max_port_p_one; + int num_slaves_before = 1; + + for (i = 1; i < port; i++) { + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(i, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + num_slaves_before += bitmap_weight( + slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + + if (slave_gid < num_slaves_before) { + bitmap_zero(exclusive_ports.ports, dev->caps.num_ports); + set_bit(port - 1, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid += bitmap_weight( + slaves_pport_actv.slaves, + dev->num_vfs + 1) - + num_slaves_before; + } + actv_ports = mlx4_get_active_ports(dev, slave_gid); + max_port_p_one = find_first_bit( + actv_ports.ports, dev->caps.num_ports) + + bitmap_weight(actv_ports.ports, + dev->caps.num_ports) + 1; + + for (i = 1; i < max_port_p_one; i++) { + if (i == port) + continue; + bitmap_zero(exclusive_ports.ports, + dev->caps.num_ports); + set_bit(i - 1, exclusive_ports.ports); + slaves_pport_actv = + mlx4_phys_to_slaves_pport_actv( + dev, &exclusive_ports); + slave_gid += bitmap_weight( + slaves_pport_actv.slaves, + dev->num_vfs + 1); + } + } + *slave_id = slave_gid; } return (found_ix >= 0) ? 0 : -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 74e490d70184..2a33513a0e31 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -468,6 +468,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) spin_lock_init(&res_alloc->alloc_lock); for (t = 0; t < dev->num_vfs + 1; t++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev, t); switch (i) { case RES_QP: initialize_res_quotas(dev, res_alloc, RES_QP, @@ -497,10 +499,27 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; case RES_MAC: if (t == mlx4_master_func_num(dev)) { - res_alloc->quota[t] = MLX4_MAX_MAC_NUM; + int max_vfs_pport = 0; + /* Calculate the max vfs per port for */ + /* both ports. */ + for (j = 0; j < dev->caps.num_ports; + j++) { + struct mlx4_slaves_pport slaves_pport = + mlx4_phys_to_slaves_pport(dev, j + 1); + unsigned current_slaves = + bitmap_weight(slaves_pport.slaves, + dev->caps.num_ports) - 1; + if (max_vfs_pport < current_slaves) + max_vfs_pport = + current_slaves; + } + res_alloc->quota[t] = + MLX4_MAX_MAC_NUM - + 2 * max_vfs_pport; res_alloc->guaranteed[t] = 2; for (j = 0; j < MLX4_MAX_PORTS; j++) - res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM; + res_alloc->res_port_free[j] = + MLX4_MAX_MAC_NUM; } else { res_alloc->quota[t] = MLX4_MAX_MAC_NUM; res_alloc->guaranteed[t] = 2; @@ -528,9 +547,10 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; } if (i == RES_MAC || i == RES_VLAN) { - for (j = 0; j < MLX4_MAX_PORTS; j++) - res_alloc->res_port_rsvd[j] += - res_alloc->guaranteed[t]; + for (j = 0; j < dev->caps.num_ports; j++) + if (test_bit(j, actv_ports.ports)) + res_alloc->res_port_rsvd[j] += + res_alloc->guaranteed[t]; } else { res_alloc->res_reserved += res_alloc->guaranteed[t]; } @@ -612,7 +632,8 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, if (MLX4_QP_ST_UD == ts) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) - qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80; + qp_ctx->pri_path.mgid_index = + mlx4_get_base_gid_ix(dev, slave, port) | 0x80; else qp_ctx->pri_path.mgid_index = slave | 0x80; @@ -620,7 +641,8 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { - qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->pri_path.mgid_index += + mlx4_get_base_gid_ix(dev, slave, port); qp_ctx->pri_path.mgid_index &= 0x7f; } else { qp_ctx->pri_path.mgid_index = slave & 0x7F; @@ -629,7 +651,8 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { - qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->alt_path.mgid_index += + mlx4_get_base_gid_ix(dev, slave, port); qp_ctx->alt_path.mgid_index &= 0x7f; } else { qp_ctx->alt_path.mgid_index = slave & 0x7F; @@ -1780,6 +1803,11 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; port = !in_port ? get_param_l(out_param) : in_port; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; mac = in_param; err = __mlx4_register_mac(dev, port, mac); @@ -1887,6 +1915,11 @@ static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (!port || op != RES_OP_RESERVE_AND_MAP) return -EINVAL; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; /* upstream kernels had NOP for reg/unreg vlan. Continue this. */ if (!in_port && port > 0 && port <= dev->caps.num_ports) { slave_state[slave].old_vlan_api = true; @@ -2184,6 +2217,11 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE_AND_MAP: port = !in_port ? get_param_l(out_param) : in_port; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; mac_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_mac(dev, port, in_param); break; @@ -2203,6 +2241,11 @@ static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int err = 0; + port = mlx4_slave_convert_port( + dev, slave, port); + + if (port < 0) + return -EINVAL; switch (op) { case RES_OP_RESERVE_AND_MAP: if (slave_state[slave].old_vlan_api) @@ -2811,7 +2854,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) - num_gids = mlx4_get_slave_num_gids(dev, slave); + num_gids = mlx4_get_slave_num_gids(dev, slave, port); else num_gids = 1; if (qp_ctx->pri_path.mgid_index >= num_gids) @@ -2820,7 +2863,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) - num_gids = mlx4_get_slave_num_gids(dev, slave); + num_gids = mlx4_get_slave_num_gids(dev, slave, port); else num_gids = 1; if (qp_ctx->alt_path.mgid_index >= num_gids) @@ -3338,6 +3381,39 @@ int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } +static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave, + struct mlx4_qp_context *qpc, + struct mlx4_cmd_mailbox *inbox) +{ + enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *)inbox->buf); + u8 pri_sched_queue; + int port = mlx4_slave_convert_port( + dev, slave, (qpc->pri_path.sched_queue >> 6 & 1) + 1) - 1; + + if (port < 0) + return -EINVAL; + + pri_sched_queue = (qpc->pri_path.sched_queue & ~(1 << 6)) | + ((port & 1) << 6); + + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH || + mlx4_is_eth(dev, port + 1)) { + qpc->pri_path.sched_queue = pri_sched_queue; + } + + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = mlx4_slave_convert_port( + dev, slave, (qpc->alt_path.sched_queue >> 6 & 1) + + 1) - 1; + if (port < 0) + return -EINVAL; + qpc->alt_path.sched_queue = + (qpc->alt_path.sched_queue & ~(1 << 6)) | + (port & 1) << 6; + } + return 0; +} + static int roce_verify_mac(struct mlx4_dev *dev, int slave, struct mlx4_qp_context *qpc, struct mlx4_cmd_mailbox *inbox) @@ -3375,6 +3451,9 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, u8 orig_vlan_index = qpc->pri_path.vlan_index; u8 orig_feup = qpc->pri_path.feup; + err = adjust_qp_sched_queue(dev, slave, qpc, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); if (err) return err; @@ -3426,6 +3505,9 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); if (err) return err; @@ -3445,6 +3527,9 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); if (err) return err; @@ -3463,6 +3548,9 @@ int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_qp_context *context = inbox->buf + 8; + int err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } @@ -3476,6 +3564,9 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); if (err) return err; @@ -3495,6 +3586,9 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, int err; struct mlx4_qp_context *context = inbox->buf + 8; + err = adjust_qp_sched_queue(dev, slave, context, inbox); + if (err) + return err; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); if (err) return err; @@ -3598,16 +3692,26 @@ static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, return err; } -static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_loopback, enum mlx4_protocol prot, +static int qp_attach(struct mlx4_dev *dev, int slave, struct mlx4_qp *qp, + u8 gid[16], int block_loopback, enum mlx4_protocol prot, enum mlx4_steer_type type, u64 *reg_id) { switch (dev->caps.steering_mode) { - case MLX4_STEERING_MODE_DEVICE_MANAGED: - return mlx4_trans_to_dmfs_attach(dev, qp, gid, gid[5], + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + int port = mlx4_slave_convert_port(dev, slave, gid[5]); + if (port < 0) + return port; + return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, block_loopback, prot, reg_id); + } case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) { + int port = mlx4_slave_convert_port(dev, slave, gid[5]); + if (port < 0) + return port; + gid[5] = port; + } return mlx4_qp_attach_common(dev, qp, gid, block_loopback, prot, type); default: @@ -3615,9 +3719,9 @@ static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], } } -static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - enum mlx4_protocol prot, enum mlx4_steer_type type, - u64 reg_id) +static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], enum mlx4_protocol prot, + enum mlx4_steer_type type, u64 reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: @@ -3654,7 +3758,7 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, qp.qpn = qpn; if (attach) { - err = qp_attach(dev, &qp, gid, block_loopback, prot, + err = qp_attach(dev, slave, &qp, gid, block_loopback, prot, type, ®_id); if (err) { pr_err("Fail to attach rule to qp 0x%x\n", qpn); @@ -3790,6 +3894,9 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + ctrl->port = mlx4_slave_convert_port(dev, slave, ctrl->port); + if (ctrl->port <= 0) + return -EINVAL; qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 122c7cabaee7..6b3998396b99 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1231,4 +1231,5 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( /* Returns the slave's virtual port that represents the physical port. */ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port); +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From e471b40321a94f07d13b8a9e4b064885cf08835d Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 23 Mar 2014 09:50:43 +0200 Subject: mlx4: Use actual number of PCI functions (PF + VFs) for alias GUID logic The code which is dealing with SRIOV alias GUIDs in the mlx4 IB driver has some logic which operated according to the maximal possible active functions (PF + VFs). After the single port VFs code integration this resulted in a flow of false-positive warnings going to the kernel log after the PF driver started the alias GUID work. Fix it by referring to the actual number of functions. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 2f215b93db6b..0eb141c41416 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_slaves) + if (slave_id >= dev->dev->num_vfs + 1) return; tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, -- cgit v1.2.3 From bfd2793c9559ae73ae021797f1d4b097c27f24be Mon Sep 17 00:00:00 2001 From: Yann Droneaud <[mailto:ydroneaud@opteya.com]> Date: Fri, 28 Mar 2014 14:55:21 -0400 Subject: RDMA/cxgb4: set error code on kmalloc() failure If kmalloc() fails in c4iw_alloc_ucontext(), the function leaves but does not set an error code in ret variable: it will return 0 to the caller. This patch set ret to -ENOMEM in such case. Cc: Steve Wise Cc: Steve Wise Signed-off-by: Yann Droneaud Acked-by: Steve Wise Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/provider.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e36d2a27c431..79429256023a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -128,8 +128,10 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; } else { mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) + if (!mm) { + ret = -ENOMEM; goto err_free; + } uresp.status_page_size = PAGE_SIZE; -- cgit v1.2.3