diff options
author | Jakub Kicinski <kuba@kernel.org> | 2021-08-10 22:19:16 +0200 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2021-08-10 22:19:17 +0200 |
commit | ebd0d30cc5e44ed3a6db7683bd357b3eea636e74 (patch) | |
tree | 599bfe946190cbbc57fcdf95b479dc90893b5ef3 /drivers/infiniband | |
parent | Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next (diff) | |
parent | net/mlx5: Lag, Create shared FDB when in switchdev mode (diff) | |
download | linux-ebd0d30cc5e44ed3a6db7683bd357b3eea636e74.tar.xz linux-ebd0d30cc5e44ed3a6db7683bd357b3eea636e74.zip |
Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Saeed Mahameed says:
====================
pull-request: mlx5-next 2020-08-9
This pulls mlx5-next branch which includes patches already reviewed on
net-next and rdma mailing lists.
1) mlx5 single E-Switch FDB for lag
2) IB/mlx5: Rename is_apu_thread_cq function to is_apu_cq
3) Add DCS caps & fields support
[1] https://patchwork.kernel.org/project/netdevbpf/cover/20210803231959.26513-1-saeed@kernel.org/
[2] https://patchwork.kernel.org/project/netdevbpf/patch/0e3364dab7e0e4eea5423878b01aa42470be8d36.1626609184.git.leonro@nvidia.com/
[3] https://patchwork.kernel.org/project/netdevbpf/patch/55e1d69bef1fbfa5cf195c0bfcbe35c8019de35e.1624258894.git.leonro@nvidia.com/
* 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux:
net/mlx5: Lag, Create shared FDB when in switchdev mode
net/mlx5: E-Switch, add logic to enable shared FDB
net/mlx5: Lag, move lag destruction to a workqueue
net/mlx5: Lag, properly lock eswitch if needed
net/mlx5: Add send to vport rules on paired device
net/mlx5: E-Switch, Add event callback for representors
net/mlx5e: Use shared mappings for restoring from metadata
net/mlx5e: Add an option to create a shared mapping
net/mlx5: E-Switch, set flow source for send to uplink rule
RDMA/mlx5: Add shared FDB support
{net, RDMA}/mlx5: Extend send to vport rules
RDMA/mlx5: Fill port info based on the relevant eswitch
net/mlx5: Lag, add initial logic for shared FDB
net/mlx5: Return mdev from eswitch
IB/mlx5: Rename is_apu_thread_cq function to is_apu_cq
net/mlx5: Add DCS caps & fields support
====================
Link: https://lore.kernel.org/r/20210809202522.316930-1-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 77 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 44 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/std_types.c | 10 |
5 files changed, 107 insertions, 33 deletions
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7abeb576b3c5..18b55d2eba40 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -997,7 +997,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(cqc, cqc, uar_page, index); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index eb9b0a2707f8..e994aefcc40f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1437,11 +1437,10 @@ out: rcu_read_unlock(); } -static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in) { if (!MLX5_CAP_GEN(dev->mdev, apu) || - !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), - apu_thread_cq)) + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq)) return false; return true; @@ -1501,7 +1500,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); } else if (opcode == MLX5_CMD_OP_CREATE_CQ && - !is_apu_thread_cq(dev, cmd_in)) { + !is_apu_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index b25e0b33a11a..52821485371a 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -8,13 +8,15 @@ #include "srq.h" static int -mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, + struct mlx5_eswitch_rep *rep, + int vport_index) { struct mlx5_ib_dev *ibdev; - int vport_index; ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); - vport_index = rep->vport_index; + if (!ibdev) + return -EINVAL; ibdev->port[vport_index].rep = rep; rep->rep_data[REP_IB].priv = ibdev; @@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return 0; } +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { u32 num_ports = mlx5_eswitch_get_total_vports(dev); const struct mlx5_ib_profile *profile; + struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; + u32 peer_num_ports; int vport_index; int ret; + vport_index = rep->vport_index; + + if (mlx5_lag_is_shared_fdb(dev)) { + peer_dev = mlx5_lag_get_peer_mdev(dev); + peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); + if (mlx5_lag_is_master(dev)) { + /* Only 1 ib port is the representor for both uplinks */ + num_ports += peer_num_ports - 1; + } else { + if (rep->vport == MLX5_VPORT_UPLINK) + return 0; + vport_index += peer_num_ports; + dev = peer_dev; + } + } + if (rep->vport == MLX5_VPORT_UPLINK) profile = &raw_eth_profile; else - return mlx5_ib_set_vport_rep(dev, rep); + return mlx5_ib_set_vport_rep(dev, rep, vport_index); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) @@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto fail_add; rep->rep_data[REP_IB].priv = ibdev; + if (mlx5_lag_is_shared_fdb(dev)) + mlx5_ib_register_peer_vport_reps(dev); return 0; @@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) static void mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { + struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw); struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); + int vport_index = rep->vport_index; struct mlx5_ib_port *port; - port = &dev->port[rep->vport_index]; + if (WARN_ON(!mdev)) + return; + + if (mlx5_lag_is_shared_fdb(mdev) && + !mlx5_lag_is_master(mdev)) { + struct mlx5_core_dev *peer_mdev; + + if (rep->vport == MLX5_VPORT_UPLINK) + return; + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + vport_index += mlx5_eswitch_get_total_vports(peer_mdev); + } + + if (!dev) + return; + + port = &dev->port[vport_index]; write_lock(&port->roce.netdev_lock); port->roce.netdev = NULL; write_unlock(&port->roce.netdev_lock); rep->rep_data[REP_IB].priv = NULL; port->rep = NULL; - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) { + struct mlx5_core_dev *peer_mdev; + struct mlx5_eswitch *esw; + + if (mlx5_lag_is_shared_fdb(mdev)) { + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + } __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + } } static const struct mlx5_eswitch_rep_ops rep_ops = { @@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = { .get_proto_dev = mlx5_ib_rep_to_dev, }; +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev); + struct mlx5_eswitch *esw; + + if (!peer_mdev) + return; + + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); +} + struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num) { @@ -123,7 +186,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, rep = dev->port[port - 1].rep; - return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn); + return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn); } static int mlx5r_rep_probe(struct auxiliary_device *adev, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 094c976b1eed..ae05e143401c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev, static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, struct net_device *ndev, + struct net_device *upper, u32 *port_num) { struct net_device *rep_ndev; @@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, if (!port->rep) continue; + if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) { + *port_num = i + 1; + return &port->roce; + } + + if (upper && port->rep->vport == MLX5_VPORT_UPLINK) + continue; + read_lock(&port->roce.netdev_lock); rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw, port->rep->vport); @@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this, } if (ibdev->is_rep) - roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); + roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num); if (!roce) return NOTIFY_DONE; - if ((upper == ndev || (!upper && ndev == roce->netdev)) - && ibdev->ib_active) { + if ((upper == ndev || + ((!upper || ibdev->is_rep) && ndev == roce->netdev)) && + ibdev->ib_active) { struct ib_event ibev = { }; enum ib_port_state port_state; @@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_roce(mdev)) + if (!ns || !mlx5_lag_is_active(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - return err; + if (!dev->is_rep && dev->profile != &raw_eth_profile) { + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; + } err = mlx5_eth_lag_init(dev); if (err) @@ -3085,7 +3097,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) return 0; err_disable_roce: - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); return err; } @@ -3093,7 +3106,8 @@ err_disable_roce: static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { mlx5_eth_lag_cleanup(dev); - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); } static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num, @@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) /* Register only for native ports */ err = mlx5_add_netdev_notifier(dev, port_num); - if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev)) - /* - * We don't enable ETH interface for - * 1. IB representors - * 2. User disabled ROCE through devlink interface - */ + if (err) return err; err = mlx5_enable_eth(dev); @@ -3980,8 +3989,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - if (!dev->is_rep) - mlx5_disable_eth(dev); + mlx5_disable_eth(dev); port_num = mlx5_core_native_port_num(dev->mdev) - 1; mlx5_remove_netdev_notifier(dev, port_num); @@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; - if (!mlx5_lag_is_roce(dev->mdev)) + if (!mlx5_lag_is_active(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c index c0ddf7b3c6e2..bbfcce3bdc84 100644 --- a/drivers/infiniband/hw/mlx5/std_types.c +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -114,14 +114,18 @@ out: static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num, struct mlx5_ib_uapi_query_port *info) { - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_eswitch_rep *rep; + struct mlx5_core_dev *mdev; int err; rep = dev->port[port_num - 1].rep; if (!rep) return -EOPNOTSUPP; + mdev = mlx5_eswitch_get_core_dev(rep->esw); + if (!mdev) + return -EINVAL; + info->vport = rep->vport; info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT; @@ -138,9 +142,9 @@ static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num, if (err) return err; - if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) { + if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) { info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match( - mdev->priv.eswitch, rep->vport); + rep->esw, rep->vport); info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0; } |