diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-24 20:48:00 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-24 20:48:00 +0200 |
commit | 54d7e8190ecfe72ff0dab96545e782f7298cb69a (patch) | |
tree | 1ffd6c6083413871ebc06f30e649e1670726dbe8 /drivers/net/ethernet | |
parent | Merge tag 'sched_ext-for-6.12-rc1-fixes' of git://git.kernel.org/pub/scm/linu... (diff) | |
parent | RDMA/bnxt_re: Remove the unused variable en_dev (diff) | |
download | linux-54d7e8190ecfe72ff0dab96545e782f7298cb69a.tar.xz linux-54d7e8190ecfe72ff0dab96545e782f7298cb69a.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Usual collection of small improvements and fixes, nothing especially
stands out to me here.
The new multipath PCI feature is a sign of things to come, I think we
will see more of this in the next 10 years. Broadcom and HNS continue
to update their drivers for their new HW generations.
Summary:
- Bug fixes and minor improvments in cxgb4, siw, mlx5, rxe, efa, rts,
hfi, erdma, hns, irdma
- Code cleanups/typos/etc. Tidy alloc_ordered_workqueue() calls
- Multipath PCI for mlx5
- Variable size work queue, SRQ changes, and relaxed ordering for new
bnxt HW
- New ODP fault resolution FW protocol in mlx5
- New 'rdma monitor' netlink mechanism"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (99 commits)
RDMA/bnxt_re: Remove the unused variable en_dev
RDMA/nldev: Add missing break in rdma_nl_notify_err_msg()
RDMA/irdma: fix error message in irdma_modify_qp_roce()
RDMA/cxgb4: Added NULL check for lookup_atid
RDMA/hns: Fix ah error counter in sw stat not increasing
RDMA/bnxt_re: Recover the device when FW error is detected
RDMA/bnxt_re: Group all operations under add_device and remove_device
RDMA/bnxt_re: Use the aux device for L2 ULP callbacks
RDMA/bnxt_re: Change aux driver data to en_info to hold more information
RDMA/nldev: Expose whether RDMA monitoring is supported
RDMA/nldev: Add support for RDMA monitoring
RDMA/mlx5: Use IB set_netdev and get_netdev functions
RDMA/device: Remove optimization in ib_device_get_netdev()
RDMA/mlx5: Initialize phys_port_cnt earlier in RDMA device creation
RDMA/mlx5: Obtain upper net device only when needed
RDMA/mlx5: Check RoCE LAG status before getting netdev
RDMA/mlx5: Consider the query_vuid cap for data_direct
net/mlx5: Handle memory scheme ODP capabilities
RDMA/mlx5: Add implicit MR handling to ODP memory scheme
RDMA/mlx5: Add handling for memory scheme page fault events
...
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 76 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 54 |
2 files changed, 75 insertions, 55 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index cf8045b92689..8577db3308cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -445,6 +445,34 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); } +static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + unsigned long flags; + int i; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + + if (!ldev) + goto unlock; + + for (i = 0; i < ldev->ports; i++) + if (ldev->tracker.netdev_state[i].tx_enabled) + ndev = ldev->pf[i].netdev; + if (!ndev) + ndev = ldev->pf[ldev->ports - 1].netdev; + + if (ndev) + dev_hold(ndev); + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + + return ndev; +} + void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { @@ -477,9 +505,18 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, } } - if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && - !(ldev->mode == MLX5_LAG_MODE_ROCE)) - mlx5_lag_drop_rule_setup(ldev, tracker); + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0); + + if(!(ldev->mode == MLX5_LAG_MODE_ROCE)) + mlx5_lag_drop_rule_setup(ldev, tracker); + /** Only sriov and roce lag should have tracker->tx_type set so + * no need to check the mode + */ + blocking_notifier_call_chain(&dev0->priv.lag_nh, + MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, + ndev); + } } static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, @@ -613,6 +650,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, mlx5_core_err(dev0, "Failed to deactivate RoCE LAG; driver restart required\n"); } + BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh); return err; } @@ -1492,38 +1530,6 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev) mlx5_queue_bond_work(ldev, 0); } -struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) -{ - struct net_device *ndev = NULL; - struct mlx5_lag *ldev; - unsigned long flags; - int i; - - spin_lock_irqsave(&lag_lock, flags); - ldev = mlx5_lag_dev(dev); - - if (!(ldev && __mlx5_lag_is_roce(ldev))) - goto unlock; - - if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - for (i = 0; i < ldev->ports; i++) - if (ldev->tracker.netdev_state[i].tx_enabled) - ndev = ldev->pf[i].netdev; - if (!ndev) - ndev = ldev->pf[ldev->ports - 1].netdev; - } else { - ndev = ldev->pf[MLX5_LAG_P1].netdev; - } - if (ndev) - dev_hold(ndev); - -unlock: - spin_unlock_irqrestore(&lag_lock, flags); - - return ndev; -} -EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); - u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8b0abd61eca6..220a9ac75c8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -454,8 +454,8 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx) static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) { + bool do_set = false, mem_page_fault = false; void *set_hca_cap; - bool do_set = false; int err; if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || @@ -470,6 +470,17 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur, MLX5_ST_SZ_BYTES(odp_cap)); + /* For best performance, enable memory scheme ODP only when + * it has page prefetch enabled. + */ + if (MLX5_CAP_ODP_MAX(dev, mem_page_fault) && + MLX5_CAP_ODP_MAX(dev, memory_page_fault_scheme_cap.page_prefetch)) { + mem_page_fault = true; + do_set = true; + MLX5_SET(odp_cap, set_hca_cap, mem_page_fault, mem_page_fault); + goto set; + } + #define ODP_CAP_SET_MAX(dev, field) \ do { \ u32 _res = MLX5_CAP_ODP_MAX(dev, field); \ @@ -479,25 +490,28 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) } \ } while (0) - ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive); - ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive); - ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive); - ODP_CAP_SET_MAX(dev, xrc_odp_caps.send); - ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive); - ODP_CAP_SET_MAX(dev, xrc_odp_caps.write); - ODP_CAP_SET_MAX(dev, xrc_odp_caps.read); - ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic); - ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive); - ODP_CAP_SET_MAX(dev, dc_odp_caps.send); - ODP_CAP_SET_MAX(dev, dc_odp_caps.receive); - ODP_CAP_SET_MAX(dev, dc_odp_caps.write); - ODP_CAP_SET_MAX(dev, dc_odp_caps.read); - ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic); - - if (!do_set) - return 0; - - return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.ud_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.rc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.send); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.write); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.read); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.atomic); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.send); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.write); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.read); + ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.atomic); + +set: + if (do_set) + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); + + mlx5_core_dbg(dev, "Using ODP %s scheme\n", + mem_page_fault ? "memory" : "transport"); + return err; } static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) |