diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
113 files changed, 17480 insertions, 1605 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 87e90b5d4d7d..5b11557f1ae4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -210,7 +210,7 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) mutex_lock(&persist->interface_state_mutex); if (persist->interface_state & MLX4_INTERFACE_STATE_UP && !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { - err = mlx4_restart_one(persist->pdev, false, NULL); + err = mlx4_restart_one(persist->pdev); mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err); } diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c index 88316c743820..eaf08f7ad128 100644 --- a/drivers/net/ethernet/mellanox/mlx4/crdump.c +++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c @@ -99,8 +99,7 @@ static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev, readl(cr_space + offset); err = devlink_region_snapshot_create(crdump->region_crspace, - cr_res_size, crspace_data, - id, &kvfree); + crspace_data, id, &kvfree); if (err) { kvfree(crspace_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", @@ -139,9 +138,7 @@ static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev, readl(health_buf_start + offset); err = devlink_region_snapshot_create(crdump->region_fw_health, - HEALTH_BUFFER_SIZE, - health_data, - id, &kvfree); + health_data, id, &kvfree); if (err) { kvfree(health_data); mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 94c59939a8cf..d8313e2ee600 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -639,7 +639,7 @@ static unsigned long *ptys2ethtool_link_mode(struct ptys2ethtool_config *cfg, #define MLX4_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \ ({ \ struct ptys2ethtool_config *cfg; \ - const unsigned int modes[] = { __VA_ARGS__ }; \ + static const unsigned int modes[] = { __VA_ARGS__ }; \ unsigned int i; \ cfg = &ptys2ethtool_map[reg_]; \ cfg->speed = speed_; \ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c1438ae52a11..40ec5acf79c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2645,14 +2645,6 @@ out: en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret); return; } - - /* set offloads */ - priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2660,14 +2652,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) int ret; struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, vxlan_del_task); - /* unset offloads */ - priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL); - ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); if (ret) @@ -3415,6 +3399,23 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (mdev->LSO_support) dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + if (mdev->dev->caps.tunnel_offload_mode == + MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + } + dev->vlan_features = dev->hw_features; dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH; @@ -3483,16 +3484,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } - if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { - dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; - dev->features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; - dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; - } - /* MTU range: 68 - hw-specific max */ dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = priv->max_mtu; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 36a92b19e613..4d5ca302c067 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -772,9 +772,7 @@ static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv, /* Map fragments if any */ for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { - const struct skb_frag_struct *frag; - - frag = &shinfo->frags[i_frag]; + const skb_frag_t *frag = &shinfo->frags[i_frag]; byte_count = skb_frag_size(frag); dma = skb_frag_dma_map(ddev, frag, 0, byte_count, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 309470ec0219..fce9b3a24347 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2292,23 +2292,31 @@ static int mlx4_init_fw(struct mlx4_dev *dev) static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_init_hca_param *init_hca = NULL; + struct mlx4_dev_cap *dev_cap = NULL; struct mlx4_adapter adapter; - struct mlx4_dev_cap dev_cap; struct mlx4_profile profile; - struct mlx4_init_hca_param init_hca; u64 icm_size; struct mlx4_config_dev_params params; int err; if (!mlx4_is_slave(dev)) { - err = mlx4_dev_cap(dev, &dev_cap); + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + init_hca = kzalloc(sizeof(*init_hca), GFP_KERNEL); + + if (!dev_cap || !init_hca) { + err = -ENOMEM; + goto out_free; + } + + err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); - return err; + goto out_free; } - choose_steering_mode(dev, &dev_cap); - choose_tunnel_offload_mode(dev, &dev_cap); + choose_steering_mode(dev, dev_cap); + choose_tunnel_offload_mode(dev, dev_cap); if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC && mlx4_is_master(dev)) @@ -2331,48 +2339,48 @@ static int mlx4_init_hca(struct mlx4_dev *dev) MLX4_STEERING_MODE_DEVICE_MANAGED) profile.num_mcg = MLX4_FS_NUM_MCG; - icm_size = mlx4_make_profile(dev, &profile, &dev_cap, - &init_hca); + icm_size = mlx4_make_profile(dev, &profile, dev_cap, + init_hca); if ((long long) icm_size < 0) { err = icm_size; - return err; + goto out_free; } dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; if (enable_4k_uar || !dev->persist->num_vfs) { - init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + + init_hca->log_uar_sz = ilog2(dev->caps.num_uars) + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; - init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; + init_hca->uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; } else { - init_hca.log_uar_sz = ilog2(dev->caps.num_uars); - init_hca.uar_page_sz = PAGE_SHIFT - 12; + init_hca->log_uar_sz = ilog2(dev->caps.num_uars); + init_hca->uar_page_sz = PAGE_SHIFT - 12; } - init_hca.mw_enabled = 0; + init_hca->mw_enabled = 0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) - init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE; + init_hca->mw_enabled = INIT_HCA_TPT_MW_ENABLE; - err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); + err = mlx4_init_icm(dev, dev_cap, init_hca, icm_size); if (err) - return err; + goto out_free; - err = mlx4_INIT_HCA(dev, &init_hca); + err = mlx4_INIT_HCA(dev, init_hca); if (err) { mlx4_err(dev, "INIT_HCA command failed, aborting\n"); goto err_free_icm; } - if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { - err = mlx4_query_func(dev, &dev_cap); + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_query_func(dev, dev_cap); if (err < 0) { mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); goto err_close; } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { - dev->caps.num_eqs = dev_cap.max_eqs; - dev->caps.reserved_eqs = dev_cap.reserved_eqs; - dev->caps.reserved_uars = dev_cap.reserved_uars; + dev->caps.num_eqs = dev_cap->max_eqs; + dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.reserved_uars = dev_cap->reserved_uars; } } @@ -2381,14 +2389,13 @@ static int mlx4_init_hca(struct mlx4_dev *dev) * read HCA frequency by QUERY_HCA command */ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { - memset(&init_hca, 0, sizeof(init_hca)); - err = mlx4_QUERY_HCA(dev, &init_hca); + err = mlx4_QUERY_HCA(dev, init_hca); if (err) { mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n"); dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; } else { dev->caps.hca_core_clock = - init_hca.hca_core_clock; + init_hca->hca_core_clock; } /* In case we got HCA frequency 0 - disable timestamping @@ -2464,7 +2471,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id)); - return 0; + err = 0; + goto out_free; unmap_bf: unmap_internal_clock(dev); @@ -2483,6 +2491,10 @@ err_free_icm: if (!mlx4_is_slave(dev)) mlx4_free_icms(dev); +out_free: + kfree(dev_cap); + kfree(init_hca); + return err; } @@ -3919,26 +3931,43 @@ static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink) } } -static int mlx4_devlink_reload(struct devlink *devlink, - struct netlink_ext_ack *extack) +static void mlx4_restart_one_down(struct pci_dev *pdev); +static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, + struct devlink *devlink); + +static int mlx4_devlink_reload_down(struct devlink *devlink, + struct netlink_ext_ack *extack) { struct mlx4_priv *priv = devlink_priv(devlink); struct mlx4_dev *dev = &priv->dev; struct mlx4_dev_persistent *persist = dev->persist; - int err; if (persist->num_vfs) mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n"); - err = mlx4_restart_one(persist->pdev, true, devlink); + mlx4_restart_one_down(persist->pdev); + return 0; +} + +static int mlx4_devlink_reload_up(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlx4_priv *priv = devlink_priv(devlink); + struct mlx4_dev *dev = &priv->dev; + struct mlx4_dev_persistent *persist = dev->persist; + int err; + + err = mlx4_restart_one_up(persist->pdev, true, devlink); if (err) - mlx4_err(persist->dev, "mlx4_restart_one failed, ret=%d\n", err); + mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n", + err); return err; } static const struct devlink_ops mlx4_devlink_ops = { .port_type_set = mlx4_devlink_port_type_set, - .reload = mlx4_devlink_reload, + .reload_down = mlx4_devlink_reload_down, + .reload_up = mlx4_devlink_reload_up, }; static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) @@ -4151,7 +4180,13 @@ static int restore_current_port_types(struct mlx4_dev *dev, return err; } -int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink) +static void mlx4_restart_one_down(struct pci_dev *pdev) +{ + mlx4_unload_one(pdev); +} + +static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload, + struct devlink *devlink) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; @@ -4163,7 +4198,6 @@ int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink) total_vfs = dev->persist->num_vfs; memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); - mlx4_unload_one(pdev); if (reload) mlx4_devlink_param_load_driverinit_values(devlink); err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); @@ -4182,6 +4216,12 @@ int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink) return err; } +int mlx4_restart_one(struct pci_dev *pdev) +{ + mlx4_restart_one_down(pdev); + return mlx4_restart_one_up(pdev, false, NULL); +} + #define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT } #define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF } #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 23f1b5b512c2..527b52e48276 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1043,8 +1043,7 @@ int mlx4_catas_init(struct mlx4_dev *dev); void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_crdump_init(struct mlx4_dev *dev); void mlx4_crdump_end(struct mlx4_dev *dev); -int mlx4_restart_one(struct pci_dev *pdev, bool reload, - struct devlink *devlink); +int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 37fef8cd25e3..0dba272a5b2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -10,6 +10,7 @@ config MLX5_CORE imply PTP_1588_CLOCK imply VXLAN imply MLXFW + imply PCI_HYPERV_INTERFACE default n ---help--- Core driver for low level functionality of the ConnectX-4 and @@ -32,7 +33,6 @@ config MLX5_FPGA config MLX5_CORE_EN bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE - depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL select DIMLIB default n @@ -154,3 +154,10 @@ config MLX5_EN_TLS Build support for TLS cryptography-offload accelaration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. + +config MLX5_SW_STEERING + bool "Mellanox Technologies software-managed steering" + depends on MLX5_CORE_EN && MLX5_ESWITCH + default y + help + Build support for software-managed steering in the NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 57d2cc666fe3..5708fcc079ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o # @@ -23,8 +23,9 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \ - en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o + en_selftest.o en/port.o en/monitor_stats.o en/health.o \ + en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \ + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o # # Netdev extra @@ -34,7 +35,8 @@ mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ - en/tc_tun_geneve.o + en/tc_tun_geneve.o diag/en_tc_tracepoint.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o # # Core extra @@ -44,6 +46,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o # # Ipoib netdev @@ -64,3 +67,10 @@ mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ en_accel/ktls.o en_accel/ktls_tx.o + +mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \ + steering/dr_matcher.o steering/dr_rule.o \ + steering/dr_icm_pool.o steering/dr_crc32.o \ + steering/dr_ste.o steering/dr_send.o \ + steering/dr_cmd.o steering/dr_fw.o \ + steering/dr_action.o steering/fs_dr.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 8cdd7e66f8df..ea934cd02448 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -446,6 +446,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_UMEM: case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: + case MLX5_CMD_OP_MODIFY_XRQ: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -637,6 +639,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DESTROY_UCTX); MLX5_COMMAND_STR_CASE(CREATE_UMEM); MLX5_COMMAND_STR_CASE(DESTROY_UMEM); + MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); + MLX5_COMMAND_STR_CASE(MODIFY_XRQ); default: return "unknown command opcode"; } } @@ -1368,49 +1372,19 @@ static void clean_debug_files(struct mlx5_core_dev *dev) debugfs_remove_recursive(dbg->dbg_root); } -static int create_debugfs_files(struct mlx5_core_dev *dev) +static void create_debugfs_files(struct mlx5_core_dev *dev) { struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - int err = -ENOMEM; - - if (!mlx5_debugfs_root) - return 0; dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); - if (!dbg->dbg_root) - return err; - - dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_in) - goto err_dbg; - dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_out) - goto err_dbg; - - dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root, - dev, &olfops); - if (!dbg->dbg_outlen) - goto err_dbg; - - dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root, - &dbg->status); - if (!dbg->dbg_status) - goto err_dbg; - - dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); - if (!dbg->dbg_run) - goto err_dbg; + debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops); + debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status); + debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); mlx5_cmdif_debugfs_init(dev); - - return 0; - -err_dbg: - clean_debug_files(dev); - return err; } static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) @@ -2007,17 +1981,10 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_cache; } - err = create_debugfs_files(dev); - if (err) { - err = -ENOMEM; - goto err_wq; - } + create_debugfs_files(dev); return 0; -err_wq: - destroy_workqueue(cmd->wq); - err_cache: destroy_msg_cache(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index a11e22d0b0cc..04854e5fbcd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -92,8 +92,6 @@ EXPORT_SYMBOL(mlx5_debugfs_root); void mlx5_register_debugfs(void) { mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL); - if (IS_ERR_OR_NULL(mlx5_debugfs_root)) - mlx5_debugfs_root = NULL; } void mlx5_unregister_debugfs(void) @@ -101,45 +99,25 @@ void mlx5_unregister_debugfs(void) debugfs_remove(mlx5_debugfs_root); } -int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return 0; - atomic_set(&dev->num_qps, 0); dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root); - if (!dev->priv.qp_debugfs) - return -ENOMEM; - - return 0; } void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.qp_debugfs); } -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return 0; - dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root); - if (!dev->priv.eq_debugfs) - return -ENOMEM; - - return 0; } void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.eq_debugfs); } @@ -183,85 +161,41 @@ static const struct file_operations stats_fops = { .write = average_write, }; -int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) { struct mlx5_cmd_stats *stats; struct dentry **cmd; const char *namep; - int err; int i; - if (!mlx5_debugfs_root) - return 0; - cmd = &dev->priv.cmdif_debugfs; *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); - if (!*cmd) - return -ENOMEM; for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) { stats = &dev->cmd.stats[i]; namep = mlx5_command_str(i); if (strcmp(namep, "unknown command opcode")) { stats->root = debugfs_create_dir(namep, *cmd); - if (!stats->root) { - mlx5_core_warn(dev, "failed adding command %d\n", - i); - err = -ENOMEM; - goto out; - } - - stats->avg = debugfs_create_file("average", 0400, - stats->root, stats, - &stats_fops); - if (!stats->avg) { - mlx5_core_warn(dev, "failed creating debugfs file\n"); - err = -ENOMEM; - goto out; - } - - stats->count = debugfs_create_u64("n", 0400, - stats->root, - &stats->n); - if (!stats->count) { - mlx5_core_warn(dev, "failed creating debugfs file\n"); - err = -ENOMEM; - goto out; - } + + debugfs_create_file("average", 0400, stats->root, stats, + &stats_fops); + debugfs_create_u64("n", 0400, stats->root, &stats->n); } } - - return 0; -out: - debugfs_remove_recursive(dev->priv.cmdif_debugfs); - return err; } void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.cmdif_debugfs); } -int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return 0; - dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root); - if (!dev->priv.cq_debugfs) - return -ENOMEM; - - return 0; } void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) { - if (!mlx5_debugfs_root) - return; - debugfs_remove_recursive(dev->priv.cq_debugfs); } @@ -484,7 +418,6 @@ static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, { struct mlx5_rsc_debug *d; char resn[32]; - int err; int i; d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL); @@ -496,30 +429,15 @@ static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, d->type = type; sprintf(resn, "0x%x", rsn); d->root = debugfs_create_dir(resn, root); - if (!d->root) { - err = -ENOMEM; - goto out_free; - } for (i = 0; i < nfile; i++) { d->fields[i].i = i; - d->fields[i].dent = debugfs_create_file(field[i], 0400, - d->root, &d->fields[i], - &fops); - if (!d->fields[i].dent) { - err = -ENOMEM; - goto out_rem; - } + debugfs_create_file(field[i], 0400, d->root, &d->fields[i], + &fops); } *dbg = d; return 0; -out_rem: - debugfs_remove_recursive(d->root); - -out_free: - kfree(d); - return err; } static void rem_res_tree(struct mlx5_rsc_debug *d) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index a400f4430c28..381925c90d94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -4,6 +4,7 @@ #include <devlink.h> #include "mlx5_core.h" +#include "fs_core.h" #include "eswitch.h" static int mlx5_devlink_flash_update(struct devlink *devlink, @@ -107,12 +108,121 @@ void mlx5_devlink_free(struct devlink *devlink) devlink_free(devlink); } +static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char *value = val.vstr; + int err = 0; + + if (!strcmp(value, "dmfs")) { + return 0; + } else if (!strcmp(value, "smfs")) { + u8 eswitch_mode; + bool smfs_cap; + + eswitch_mode = mlx5_eswitch_mode(dev->priv.eswitch); + smfs_cap = mlx5_fs_dr_is_supported(dev); + + if (!smfs_cap) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported by current device"); + } + + else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported when eswitch offloads enabled."); + err = -EOPNOTSUPP; + } + } else { + NL_SET_ERR_MSG_MOD(extack, + "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); + err = -EINVAL; + } + + return err; +} + +static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum mlx5_flow_steering_mode mode; + + if (!strcmp(ctx->val.vstr, "smfs")) + mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + mode = MLX5_FLOW_STEERING_MODE_DMFS; + dev->priv.steering->mode = mode; + + return 0; +} + +static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) + strcpy(ctx->val.vstr, "smfs"); + else + strcpy(ctx->val.vstr, "dmfs"); + return 0; +} + +enum mlx5_devlink_param_id { + MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, +}; + +static const struct devlink_param mlx5_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, + mlx5_devlink_fs_mode_validate), +}; + +static void mlx5_devlink_set_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS) + strcpy(value.vstr, "dmfs"); + else + strcpy(value.vstr, "smfs"); + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, + value); +} + int mlx5_devlink_register(struct devlink *devlink, struct device *dev) { - return devlink_register(devlink, dev); + int err; + + err = devlink_register(devlink, dev); + if (err) + return err; + + err = devlink_params_register(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); + if (err) + goto params_reg_err; + mlx5_devlink_set_params_init_values(devlink); + devlink_params_publish(devlink); + return 0; + +params_reg_err: + devlink_unregister(devlink); + return err; } void mlx5_devlink_unregister(struct devlink *devlink) { + devlink_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h new file mode 100644 index 000000000000..1177860a2ee4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_EN_REP_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_EN_REP_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "en_rep.h" + +TRACE_EVENT(mlx5e_rep_neigh_update, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha, + bool neigh_connected), + TP_ARGS(nhe, ha, neigh_connected), + TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + __array(u8, ha, ETH_ALEN) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_connected) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, mn->dev->name); + __entry->neigh_connected = neigh_connected; + memcpy(__entry->ha, ha, ETH_ALEN); + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s MAC: %pM IPv4: %pI4 IPv6: %pI6c neigh_connected=%d\n", + __get_str(devname), __entry->ha, + __entry->v4, __entry->v6, __entry->neigh_connected + ) +); + +#endif /* _MLX5_EN_REP_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_rep_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c new file mode 100644 index 000000000000..c5dc6c50fa87 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#define CREATE_TRACE_POINTS +#include "en_tc_tracepoint.h" + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) + ids[i] = entries[i].id; +} + +#define NAME_SIZE 16 + +static const char FLOWACT2STR[NUM_FLOW_ACTIONS][NAME_SIZE] = { + [FLOW_ACTION_ACCEPT] = "ACCEPT", + [FLOW_ACTION_DROP] = "DROP", + [FLOW_ACTION_TRAP] = "TRAP", + [FLOW_ACTION_GOTO] = "GOTO", + [FLOW_ACTION_REDIRECT] = "REDIRECT", + [FLOW_ACTION_MIRRED] = "MIRRED", + [FLOW_ACTION_VLAN_PUSH] = "VLAN_PUSH", + [FLOW_ACTION_VLAN_POP] = "VLAN_POP", + [FLOW_ACTION_VLAN_MANGLE] = "VLAN_MANGLE", + [FLOW_ACTION_TUNNEL_ENCAP] = "TUNNEL_ENCAP", + [FLOW_ACTION_TUNNEL_DECAP] = "TUNNEL_DECAP", + [FLOW_ACTION_MANGLE] = "MANGLE", + [FLOW_ACTION_ADD] = "ADD", + [FLOW_ACTION_CSUM] = "CSUM", + [FLOW_ACTION_MARK] = "MARK", + [FLOW_ACTION_WAKE] = "WAKE", + [FLOW_ACTION_QUEUE] = "QUEUE", + [FLOW_ACTION_SAMPLE] = "SAMPLE", + [FLOW_ACTION_POLICE] = "POLICE", + [FLOW_ACTION_CT] = "CT", +}; + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num) +{ + const char *ret = trace_seq_buffer_ptr(p); + unsigned int i; + + for (i = 0; i < num; i++) { + if (ids[i] < NUM_FLOW_ACTIONS) + trace_seq_printf(p, "%s ", FLOWACT2STR[ids[i]]); + else + trace_seq_printf(p, "UNKNOWN "); + } + + trace_seq_putc(p, 0); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h new file mode 100644 index 000000000000..d4e6cfaaade3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_TC_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_TC_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include <net/flow_offload.h> +#include "en_rep.h" + +#define __parse_action(ids, num) parse_action(p, ids, num) + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num); + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num); + +DECLARE_EVENT_CLASS(mlx5e_flower_template, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(unsigned int, num) + __dynamic_array(int, ids, f->rule ? + f->rule->action.num_entries : 0) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->num = (f->rule ? + f->rule->action.num_entries : 0); + if (__entry->num) + put_ids_to_array(__get_dynamic_array(ids), + f->rule->action.entries, + f->rule->action.num_entries); + ), + TP_printk("cookie=%p actions= %s\n", + __entry->cookie, __entry->num ? + __parse_action(__get_dynamic_array(ids), + __entry->num) : "NULL" + ) +); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_configure_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_delete_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +TRACE_EVENT(mlx5e_stats_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(u64, bytes) + __field(u64, packets) + __field(u64, lastused) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->bytes = f->stats.bytes; + __entry->packets = f->stats.pkts; + __entry->lastused = f->stats.lastused; + ), + TP_printk("cookie=%p bytes=%llu packets=%llu lastused=%llu\n", + __entry->cookie, __entry->bytes, + __entry->packets, __entry->lastused + ) +); + +TRACE_EVENT(mlx5e_tc_update_neigh_used_value, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used), + TP_ARGS(nhe, neigh_used), + TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_used) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, mn->dev->name); + __entry->neigh_used = neigh_used; + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s IPv4: %pI4 IPv6: %pI6c neigh_used=%d\n", + __get_str(devname), __entry->v4, __entry->v6, + __entry->neigh_used + ) +); + +#endif /* _MLX5_TC_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_tc_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 8a4930c8bf62..94d7b69a95c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -546,16 +546,17 @@ static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, trace_data->timestamp = timestamp; trace_data->lost = lost; trace_data->event_id = event_id; - strncpy(trace_data->msg, msg, TRACE_STR_MSG); + strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG); tracer->st_arr.saved_traces_index = (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1); mutex_unlock(&tracer->st_arr.lock); } -static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, - struct mlx5_core_dev *dev, - u64 trace_timestamp) +static noinline +void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, + struct mlx5_core_dev *dev, + u64 trace_timestamp) { char tmp[512]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 65bec19a438f..8d76452cacdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -54,6 +54,7 @@ #include "mlx5_core.h" #include "en_stats.h" #include "en/fs.h" +#include "lib/hv_vhca.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -162,6 +163,14 @@ enum mlx5e_rq_group { #define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g) }; +static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev) +{ + if (mlx5_lag_is_lacp_owner(mdev)) + return 1; + + return clamp_t(u8, MLX5_CAP_GEN(mdev, num_lag_ports), 1, MLX5_MAX_PORTS); +} + static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { switch (wq_type) { @@ -300,6 +309,7 @@ struct mlx5e_dcbx_dp { enum { MLX5E_RQ_STATE_ENABLED, + MLX5E_RQ_STATE_RECOVERING, MLX5E_RQ_STATE_AM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ @@ -356,6 +366,7 @@ enum { MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, }; struct mlx5e_sq_wqe_info { @@ -480,8 +491,6 @@ struct mlx5e_xdp_mpwqe { struct mlx5e_tx_wqe *wqe; u8 ds_count; u8 pkt_count; - u8 max_ds_count; - u8 complete; u8 inline_on; }; @@ -552,6 +561,8 @@ struct mlx5e_icosq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; + + struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_wqe_frag_info { @@ -671,6 +682,8 @@ struct mlx5e_rq { struct zero_copy_allocator zca; struct xdp_umem *umem; + struct work_struct recover_work; + /* control */ struct mlx5_wq_ctrl wq_ctrl; __be32 mkey_be; @@ -700,6 +713,7 @@ struct mlx5e_channel { struct net_device *netdev; __be32 mkey_be; u8 num_tc; + u8 lag_port; /* XDP_REDIRECT */ struct mlx5e_xdpsq xdpsq; @@ -778,6 +792,15 @@ struct mlx5e_modify_sq_param { int rl_index; }; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) +struct mlx5e_hv_vhca_stats_agent { + struct mlx5_hv_vhca_agent *agent; + struct delayed_work work; + u16 delay; + void *buf; +}; +#endif + struct mlx5e_xsk { /* UMEMs are stored separately from channels, because we don't want to * lose them when channels are recreated. The kernel also stores UMEMs, @@ -804,7 +827,7 @@ struct mlx5e_priv { struct mlx5e_rq drop_rq; struct mlx5e_channels channels; - u32 tisn[MLX5E_MAX_NUM_TC]; + u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; @@ -847,7 +870,11 @@ struct mlx5e_priv { struct mlx5e_tls *tls; #endif struct devlink_health_reporter *tx_reporter; + struct devlink_health_reporter *rx_reporter; struct mlx5e_xsk xsk; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + struct mlx5e_hv_vhca_stats_agent stats_agent; +#endif }; struct mlx5e_profile { @@ -888,6 +915,26 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); +static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_size(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_size(&rq->wqe.wq); + } +} + +static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return rq->mpwqe.wq.cur_sz; + default: + return rq->wqe.wq.cur_sz; + } +} + bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); @@ -1006,18 +1053,18 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); +void mlx5e_activate_rq(struct mlx5e_rq *rq); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_free_rx_descs(struct mlx5e_rq *rq); +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq); +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p); void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_tx_disable_queue(struct netdev_queue *txq); -static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) -{ - return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); -} - static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) { return MLX5_CAP_ETH(mdev, swp) && @@ -1063,6 +1110,7 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_destroy_tises(struct mlx5e_priv *priv); int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); @@ -1135,7 +1183,6 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index be5961ff24cc..68d593074f6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -10,11 +10,14 @@ enum { }; struct mlx5e_tc_table { + /* protects flow table */ + struct mutex t_lock; struct mlx5_flow_table *t; struct rhashtable ht; - DECLARE_HASHTABLE(mod_hdr_tbl, 8); + struct mod_hdr_tbl mod_hdr; + struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ DECLARE_HASHTABLE(hairpin_tbl, 8); struct notifier_block netdevice_nb; @@ -92,9 +95,15 @@ struct mlx5e_tirc_config { enum mlx5e_tunnel_types { MLX5E_TT_IPV4_GRE, MLX5E_TT_IPV6_GRE, + MLX5E_TT_IPV4_IPIP, + MLX5E_TT_IPV6_IPIP, + MLX5E_TT_IPV4_IPV6, + MLX5E_TT_IPV6_IPV6, MLX5E_NUM_TUNNEL_TT, }; +bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); + /* L3/L4 traffic type classifier */ struct mlx5e_ttc_table { struct mlx5e_flow_table ft; @@ -132,12 +141,17 @@ struct mlx5e_ethtool_steering { void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); -int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); -int mlx5e_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *info, u32 *rule_locs); +int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd); +int mlx5e_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs); #else static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { } static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { } +static inline int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ return -EOPNOTSUPP; } +static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_EN_RXNFC */ #ifdef CONFIG_MLX5_EN_ARFS @@ -224,5 +238,8 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); +bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type); +bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev); + #endif /* __MLX5E_FLOW_STEER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c new file mode 100644 index 000000000000..1d6b58860da6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "lib/eq.h" + +int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = cq->channel->priv; + u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; + u8 hw_status; + void *cqc; + int err; + + err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out)); + if (err) + return err; + + cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); + hw_status = MLX5_GET(cqc, cqc, status); + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + u8 cq_log_stride; + u32 cq_sz; + int err; + + cq_sz = mlx5_cqwq_get_size(&cq->wq); + cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); + if (err) + return err; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_create_reporters(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_reporter_tx_create(priv); + if (err) + return err; + + err = mlx5e_reporter_rx_create(priv); + if (err) + return err; + + return 0; +} + +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) +{ + mlx5e_reporter_rx_destroy(priv); + mlx5e_reporter_tx_destroy(priv); +} + +void mlx5e_health_channels_update(struct mlx5e_priv *priv) +{ + if (priv->tx_reporter) + devlink_health_reporter_state_update(priv->tx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + if (priv->rx_reporter) + devlink_health_reporter_state_update(priv->rx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn) +{ + struct mlx5_core_dev *mdev = channel->mdev; + struct net_device *dev = channel->netdev; + struct mlx5e_modify_sq_param msp = {}; + int err; + + msp.curr_state = MLX5_SQC_STATE_ERR; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); + return err; + } + + return 0; +} + +int mlx5e_health_recover_channels(struct mlx5e_priv *priv) +{ + int err = 0; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + err = mlx5e_safe_reopen_channels(priv); + +out: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); + + return err; +} + +int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel) +{ + u32 eqe_count; + + netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return -EIO; + + netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + + channel->stats->eq_rearm++; + return 0; +} + +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx) +{ + if (!reporter) { + netdev_err(priv->netdev, err_str); + return err_ctx->recover(&err_ctx->ctx); + } + return devlink_health_report(reporter, err_str, err_ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h new file mode 100644 index 000000000000..d3693fa547ac --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_HEALTH_H +#define __MLX5E_EN_HEALTH_H + +#include "en.h" + +#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) + +static inline bool cqe_syndrome_needs_recover(u8 syndrome) +{ + return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR || + syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || + syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; +} + +int mlx5e_reporter_tx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); + +int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); +int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg); + +int mlx5e_reporter_rx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); + +#define MLX5E_REPORTER_PER_Q_MAX_LEN 256 + +struct mlx5e_err_ctx { + int (*recover)(void *ctx); + void *ctx; +}; + +int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn); +int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel); +int mlx5e_health_recover_channels(struct mlx5e_priv *priv); +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx); +int mlx5e_health_create_reporters(struct mlx5e_priv *priv); +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); +void mlx5e_health_channels_update(struct mlx5e_priv *priv); + + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c new file mode 100644 index 000000000000..b3a249b2a482 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include "en.h" +#include "en/hv_vhca_stats.h" +#include "lib/hv_vhca.h" +#include "lib/hv.h" + +struct mlx5e_hv_vhca_per_ring_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; +}; + +static void +mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, + struct mlx5e_hv_vhca_per_ring_stats *data) +{ + struct mlx5e_channel_stats *stats; + int tc; + + stats = &priv->channel_stats[ch]; + data->rx_packets = stats->rq.packets; + data->rx_bytes = stats->rq.bytes; + + for (tc = 0; tc < priv->max_opened_tc; tc++) { + data->tx_packets += stats->sq[tc].packets; + data->tx_bytes += stats->sq[tc].bytes; + } +} + +static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data, + int buf_len) +{ + int ch, i = 0; + + for (ch = 0; ch < priv->max_nch; ch++) { + void *buf = data + i; + + if (WARN_ON_ONCE(buf + + sizeof(struct mlx5e_hv_vhca_per_ring_stats) > + data + buf_len)) + return; + + mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf); + i += sizeof(struct mlx5e_hv_vhca_per_ring_stats); + } +} + +static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv) +{ + return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) * + priv->max_nch); +} + +static void mlx5e_hv_vhca_stats_work(struct work_struct *work) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5_hv_vhca_agent *agent; + struct delayed_work *dwork; + struct mlx5e_priv *priv; + int buf_len, rc; + void *buf; + + dwork = to_delayed_work(work); + sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work); + priv = container_of(sagent, struct mlx5e_priv, stats_agent); + buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + agent = sagent->agent; + buf = sagent->buf; + + memset(buf, 0, buf_len); + mlx5e_hv_vhca_fill_stats(priv, buf, buf_len); + + rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len); + if (rc) { + mlx5_core_err(priv->mdev, + "%s: Failed to write stats, err = %d\n", + __func__, rc); + return; + } + + if (sagent->delay) + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +enum { + MLX5_HV_VHCA_STATS_VERSION = 1, + MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF, +}; + +static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5e_priv *priv; + + priv = mlx5_hv_vhca_agent_priv(agent); + sagent = &priv->stats_agent; + + block->version = MLX5_HV_VHCA_STATS_VERSION; + block->rings = priv->max_nch; + + if (!block->command) { + cancel_delayed_work_sync(&priv->stats_agent.work); + return; + } + + sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 : + msecs_to_jiffies(block->command * 100); + + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent); + + cancel_delayed_work_sync(&priv->stats_agent.work); +} + +int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + struct mlx5_hv_vhca_agent *agent; + + priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); + if (!priv->stats_agent.buf) + return -ENOMEM; + + agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, + MLX5_HV_VHCA_AGENT_STATS, + mlx5e_hv_vhca_stats_control, NULL, + mlx5e_hv_vhca_stats_cleanup, + priv); + + if (IS_ERR_OR_NULL(agent)) { + if (IS_ERR(agent)) + netdev_warn(priv->netdev, + "Failed to create hv vhca stats agent, err = %ld\n", + PTR_ERR(agent)); + + kfree(priv->stats_agent.buf); + return IS_ERR_OR_NULL(agent); + } + + priv->stats_agent.agent = agent; + INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); + + return 0; +} + +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->stats_agent.agent)) + return; + + mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent); + kfree(priv->stats_agent.buf); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h new file mode 100644 index 000000000000..664463faf77b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_STATS_VHCA_H__ +#define __MLX5_EN_STATS_VHCA_H__ +#include "en.h" + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); + +#else + +static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ +} +#endif + +#endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 79301d116667..eb2e1f2138e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -25,18 +25,33 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return headroom; } -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); - u32 frag_sz = linear_rq_headroom + hw_mtu; + + return linear_rq_headroom + hw_mtu; +} + +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk); /* AF_XDP doesn't build SKBs in place. */ if (!xsk) frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); - /* XDP in mlx5e doesn't support multiple packets per page. */ + /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a + * special case. It can run with frames smaller than a page, as it + * doesn't allocate pages dynamically. However, here we pretend that + * fragments are page-sized: it allows to treat XSK frames like pages + * by redirecting alloc and free operations to XSK rings and by using + * the fact there are no multiple packets per "page" (which is a frame). + * The latter is important, because frames may come in a random order, + * and we will have trouble assemblying a real page of multiple frames. + */ if (mlx5e_rx_is_xdp(params, xsk)) frag_sz = max_t(u32, frag_sz, PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3a615d663d84..989d8f429438 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -76,6 +76,8 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile, u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h deleted file mode 100644 index e78e92753d73..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2019 Mellanox Technologies. */ - -#ifndef __MLX5E_EN_REPORTER_H -#define __MLX5E_EN_REPORTER_H - -#include <linux/mlx5/driver.h> -#include "en.h" - -int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); -void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); -void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); -int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); - -#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c new file mode 100644 index 000000000000..b860569d4247 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "params.h" + +static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) +{ + int outlen = MLX5_ST_SZ_BYTES(query_rq_out); + void *out; + void *rqc; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_rq(dev, rqn, out); + if (err) + goto out; + + rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); + *state = MLX5_GET(rqc, rqc, state); + +out: + kvfree(out); + return err; +} + +static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (icosq->cc == icosq->pc) + return 0; + + msleep(20); + } + + netdev_err(icosq->channel->netdev, + "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) +{ + WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + icosq->cc = 0; + icosq->pc = 0; +} + +static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) +{ + struct mlx5_core_dev *mdev; + struct mlx5e_icosq *icosq; + struct net_device *dev; + struct mlx5e_rq *rq; + u8 state; + int err; + + icosq = ctx; + rq = &icosq->channel->rq; + mdev = icosq->channel->mdev; + dev = icosq->channel->netdev; + err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n", + icosq->sqn, err); + goto out; + } + + if (state != MLX5_SQC_STATE_ERR) + goto out; + + mlx5e_deactivate_rq(rq); + err = mlx5e_wait_for_icosq_flush(icosq); + if (err) + goto out; + + mlx5e_deactivate_icosq(icosq); + + /* At this point, both the rq and the icosq are disabled */ + + err = mlx5e_health_sq_to_ready(icosq->channel, icosq->sqn); + if (err) + goto out; + + mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_descs(rq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mlx5e_activate_icosq(icosq); + mlx5e_activate_rq(rq); + + rq->stats->recover++; + return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + return err; +} + +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) +{ + struct mlx5e_priv *priv = icosq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = icosq; + err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; + sprintf(err_str, "ERR CQE on ICOSQ: 0x%x", icosq->sqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +{ + struct net_device *dev = rq->netdev; + int err; + + err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); + return err; + } + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); + return err; + } + + return 0; +} + +static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) +{ + struct mlx5_core_dev *mdev; + struct net_device *dev; + struct mlx5e_rq *rq; + u8 state; + int err; + + rq = ctx; + mdev = rq->mdev; + dev = rq->netdev; + err = mlx5e_query_rq_state(mdev, rq->rqn, &state); + if (err) { + netdev_err(dev, "Failed to query RQ 0x%x state. err = %d\n", + rq->rqn, err); + goto out; + } + + if (state != MLX5_RQC_STATE_ERR) + goto out; + + mlx5e_deactivate_rq(rq); + mlx5e_free_rx_descs(rq); + + err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR); + if (err) + goto out; + + clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + mlx5e_activate_rq(rq); + rq->stats->recover++; + return 0; +out: + clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + return err; +} + +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; + sprintf(err_str, "ERR CQE on RQ: 0x%x", rq->rqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +static int mlx5e_rx_reporter_timeout_recover(void *ctx) +{ + struct mlx5e_icosq *icosq; + struct mlx5_eq_comp *eq; + struct mlx5e_rq *rq; + int err; + + rq = ctx; + icosq = &rq->channel->icosq; + eq = rq->cq.mcq.eq; + err = mlx5e_health_channel_eq_recover(eq, rq->channel); + if (err) + clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); + + return err; +} + +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *icosq = &rq->channel->icosq; + struct mlx5e_priv *priv = rq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_timeout_recover; + sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n", + icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->ctx); +} + +static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, + void *context) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) : + mlx5e_health_recover_channels(priv); +} + +static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = rq->channel->priv; + struct mlx5e_params *params; + struct mlx5e_icosq *icosq; + u8 icosq_hw_state; + int wqes_sz; + u8 hw_state; + u16 wq_head; + int err; + + params = &priv->channels.params; + icosq = &rq->channel->icosq; + err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state); + if (err) + return err; + + err = mlx5_core_query_sq_state(priv->mdev, icosq->sqn, &icosq_hw_state); + if (err) + return err; + + wqes_sz = mlx5e_rqwq_get_cur_sz(rq); + wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq); + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->channel->ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state); + if (err) + return err; + + err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_rq *generic_rq; + u32 rq_stride, rq_sz; + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + generic_rq = &priv->channels.c[0]->rq; + rq_sz = mlx5e_rqwq_get_size(generic_rq); + rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common config"); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + if (err) + goto unlock; + + err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); + if (err) + goto unlock; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride); + if (err) + goto unlock; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + + err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); + if (err) + goto unlock; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + goto unlock; +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { + .name = "rx", + .recover = mlx5e_rx_reporter_recover, + .diagnose = mlx5e_rx_reporter_diagnose, +}; + +#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 + +int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) +{ + struct devlink *devlink = priv_to_devlink(priv->mdev); + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_create(devlink, + &mlx5_rx_reporter_ops, + MLX5E_REPORTER_RX_GRACEFUL_PERIOD, + true, priv); + if (IS_ERR(reporter)) { + netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", + PTR_ERR(reporter)); + return PTR_ERR(reporter); + } + priv->rx_reporter = reporter; + return 0; +} + +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) +{ + if (!priv->rx_reporter) + return; + + devlink_health_reporter_destroy(priv->rx_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index c7f86453c638..bfed558637c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -1,16 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2019 Mellanox Technologies. */ -#include <net/devlink.h> -#include "reporter.h" -#include "lib/eq.h" - -#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 - -struct mlx5e_tx_err_ctx { - int (*recover)(struct mlx5e_txqsq *sq); - struct mlx5e_txqsq *sq; -}; +#include "health.h" static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { @@ -40,41 +31,20 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) sq->pc = 0; } -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) { - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; + struct mlx5_core_dev *mdev; + struct net_device *dev; + struct mlx5e_txqsq *sq; + u8 state; int err; - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} + sq = ctx; + mdev = sq->channel->mdev; + dev = sq->channel->netdev; -static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); if (err) { @@ -97,7 +67,7 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) * pending WQEs. SQ can safely reset the SQ. */ - err = mlx5e_sq_to_ready(sq, state); + err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn); if (err) goto out; @@ -112,115 +82,98 @@ out: return err; } -static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, - char *err_str, - struct mlx5e_tx_err_ctx *err_ctx) +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) { - if (IS_ERR_OR_NULL(tx_reporter)) { - netdev_err(err_ctx->sq->channel->netdev, err_str); - return err_ctx->recover(err_ctx->sq); - } - - return devlink_health_report(tx_reporter, err_str, err_ctx); -} + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {0}; -void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) -{ - char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; - struct mlx5e_tx_err_ctx err_ctx = {0}; - - err_ctx.sq = sq; - err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); - mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, - &err_ctx); + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); } -static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) +static int mlx5e_tx_reporter_timeout_recover(void *ctx) { - struct mlx5_eq_comp *eq = sq->cq.mcq.eq; - u32 eqe_count; - - netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->core.eqn, eq->core.cons_index, eq->core.irqn); + struct mlx5_eq_comp *eq; + struct mlx5e_txqsq *sq; + int err; - eqe_count = mlx5_eq_poll_irq_disabled(eq); - if (!eqe_count) { + sq = ctx; + eq = sq->cq.mcq.eq; + err = mlx5e_health_channel_eq_recover(eq, sq->channel); + if (err) clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - return -EIO; - } - netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", - eqe_count, eq->core.eqn); - sq->channel->stats->eq_rearm++; - return 0; + return err; } -int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) { - char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; - struct mlx5e_tx_err_ctx err_ctx; + struct mlx5e_priv *priv = sq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx; - err_ctx.sq = sq; - err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; sprintf(err_str, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - sq->txq->trans_start)); - return mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, - &err_ctx); + return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); } /* state lock cannot be grabbed within this function. * It can cause a dead lock or a read-after-free. */ -static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) -{ - return err_ctx->recover(err_ctx->sq); -} - -static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) +static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) { - int err = 0; - - rtnl_lock(); - mutex_lock(&priv->state_lock); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto out; - - err = mlx5e_safe_reopen_channels(priv); - -out: - mutex_unlock(&priv->state_lock); - rtnl_unlock(); - - return err; + return err_ctx->recover(err_ctx->ctx); } static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, void *context) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_tx_err_ctx *err_ctx = context; + struct mlx5e_err_ctx *err_ctx = context; return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : - mlx5e_tx_reporter_recover_all(priv); + mlx5e_health_recover_channels(priv); } static int mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, - u32 sqn, u8 state, bool stopped) + struct mlx5e_txqsq *sq, int tc) { + struct mlx5e_priv *priv = sq->channel->priv; + bool stopped = netif_xmit_stopped(sq->txq); + u8 state; int err; + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); if (err) return err; - err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn); + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); if (err) return err; @@ -232,6 +185,18 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, if (err) return err; + err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); + if (err) + return err; + + err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); if (err) return err; @@ -243,31 +208,61 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - int i, err = 0; + struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; + u32 sq_stride, sq_sz; + + int i, tc, err = 0; mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; + sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq); + sq_stride = MLX5_SEND_WQE_BB; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config"); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + if (err) + goto unlock; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); + if (err) + goto unlock; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); + if (err) + goto unlock; + + err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + + err = mlx5e_reporter_named_obj_nest_end(fmsg); + if (err) + goto unlock; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); if (err) goto unlock; - for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; - i++) { - struct mlx5e_txqsq *sq = priv->txq2sq[i]; - u8 state; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; - err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); - if (err) - goto unlock; + for (tc = 0; tc < priv->channels.params.num_tc; tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; - err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn, - state, - netif_xmit_stopped(sq->txq)); - if (err) - goto unlock; + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); + if (err) + goto unlock; + } } err = devlink_fmsg_arr_pair_nest_end(fmsg); if (err) @@ -286,25 +281,30 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 -int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) +int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { + struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; - struct devlink *devlink = priv_to_devlink(mdev); + struct devlink *devlink; - priv->tx_reporter = + devlink = priv_to_devlink(mdev); + reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, true, priv); - if (IS_ERR(priv->tx_reporter)) + if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", - PTR_ERR(priv->tx_reporter)); - return IS_ERR_OR_NULL(priv->tx_reporter); + PTR_ERR(reporter)); + return PTR_ERR(reporter); + } + priv->tx_reporter = reporter; + return 0; } -void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->tx_reporter)) + if (!priv->tx_reporter) return; devlink_health_reporter_destroy(priv->tx_reporter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index a6a52806be45..f8ee18b4da6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -31,29 +31,36 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev; uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - uplink_upper = netdev_master_upper_dev_get(uplink_dev); + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + /* mlx5_lag_is_sriov() is a blocking function which can't be called + * while holding rcu read lock. Take the net_device for correctness + * sake. + */ + if (uplink_upper) + dev_hold(uplink_upper); + rcu_read_unlock(); + dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && real_dev == uplink_upper && mlx5_lag_is_sriov(priv->mdev)); + if (uplink_upper) + dev_put(uplink_upper); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink */ + *route_dev = dev; if (!netdev_port_same_parent_id(priv->netdev, real_dev) || - dst_is_lag_dev) { - *route_dev = dev; + dst_is_lag_dev || is_vlan_dev(*route_dev)) *out_dev = uplink_dev; - } else { - *route_dev = dev; - if (is_vlan_dev(*route_dev)) - *out_dev = uplink_dev; - else if (mlx5e_eswitch_rep(dev) && - mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) - *out_dev = *route_dev; - else - return -EOPNOTSUPP; - } + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; if (!(mlx5e_eswitch_rep(*out_dev) && mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) @@ -284,14 +291,14 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, */ goto out; } - - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv4_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); goto destroy_neigh_entry; + } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); @@ -400,13 +407,14 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto out; } - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - ipv6_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); goto destroy_neigh_entry; + } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index ddfe19adb3d9..87be96747902 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -6,7 +6,7 @@ #include "en.h" -#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS +#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1) #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) @@ -117,9 +117,27 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, mlx5_write64((__be32 *)ctrl, uar_map); } -static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe) +static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg) { - return !!wqe->ctrl.tisn; + return cseg && !!cseg->tisn; +} + +static inline u8 +mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, + struct sk_buff *skb) +{ + u8 mode; + + if (mlx5e_transport_inline_tx_wqe(cseg)) + return MLX5_INLINE_MODE_TCP_UDP; + + mode = sq->min_inline_mode; + + if (skb_vlan_tag_present(skb) && + test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); + + return mode; } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index b0b982cf69bb..f049e0ac308a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len, bool xsk) { struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct xdp_umem *umem = rq->umem; struct xdp_buff xdp; u32 act; int err; @@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(prog, &xdp); - if (xsk) - xdp.handle += xdp.data - xdp.data_hard_start; + if (xsk) { + u64 off = xdp.data - xdp.data_hard_start; + + xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off); + } switch (act) { case XDP_PASS: *rx_headroom = xdp.data - xdp.data_hard_start; @@ -179,33 +183,19 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; struct mlx5_wq_cyc *wq = &sq->wq; - u8 wqebbs; - u16 pi; - - mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); - - prefetchw(session->wqe->data); - session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; - session->pkt_count = 0; - session->complete = 0; + u16 pi, contig_wqebbs; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif + if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) + mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs); - wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), - MLX5E_XDP_MPW_MAX_WQEBBS); + session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi); - session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; + prefetchw(session->wqe->data); + session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; + session->pkt_count = 0; mlx5e_xdp_update_inline_state(sq); @@ -244,7 +234,7 @@ static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { + MLX5E_XDPSQ_STOP_ROOM))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -285,8 +275,8 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(session->complete || - session->ds_count == session->max_ds_count)) + if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || + session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index b90923932668..36ac1e3816b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -40,6 +40,26 @@ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM) + +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) +#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ + DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + +#define MLX5E_XDP_MPW_MAX_NUM_DS \ + (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) + struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, @@ -114,6 +134,30 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) session->inline_on = 1; } +static inline bool +mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session) +{ + return session->inline_on && + session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS; +} + +static inline void +mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->num_wqebbs = 1; + wi->num_pkts = 0; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + + sq->stats->nops += nnops; +} + static inline void mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, @@ -126,20 +170,12 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, session->pkt_count++; -#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) - if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { struct mlx5_wqe_inline_seg *inline_dseg = (struct mlx5_wqe_inline_seg *)dseg; u16 ds_len = sizeof(*inline_dseg) + dma_len; u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); - if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) { - /* Not enough space for inline wqe, send with memory pointer */ - session->complete = true; - goto no_inline; - } - inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); memcpy(inline_dseg->data, xdptxd->data, dma_len); @@ -148,21 +184,23 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, return; } -no_inline: dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; session->ds_count++; } -static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, - struct mlx5e_tx_wqe **wqe) +static inline struct mlx5e_tx_wqe * +mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi) { struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe *wqe; + + *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(wqe, 0, sizeof(*wqe)); - *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - memset(*wqe, 0, sizeof(**wqe)); + return wqe; } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 6a55573ec8f2..475b6bd5d29b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -24,7 +24,8 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, if (!xsk_umem_peek_addr_rq(umem, &handle)) return -ENOMEM; - dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle, + rq->buff.umem_headroom); dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); /* No need to add headroom to the DMA address. In striding RQ case, we @@ -104,7 +105,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, /* head_offset is not used in this function, because di->xsk.data and * di->addr point directly to the necessary place. Furthermore, in the - * current implementation, one page = one packet = one frame, so + * current implementation, UMR pages are mapped to XSK frames, so * head_offset should always be 0. */ WARN_ON_ONCE(head_offset); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index 307b923a1361..cab0e93497ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -5,6 +5,7 @@ #define __MLX5_EN_XSK_RX_H__ #include "en.h" +#include <net/xdp_sock.h> /* RX data path */ @@ -24,4 +25,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); +static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err) +{ + if (!xsk_umem_uses_need_wakeup(rq->umem)) + return alloc_err; + + if (unlikely(alloc_err)) + xsk_set_rx_need_wakeup(rq->umem); + else + xsk_clear_rx_need_wakeup(rq->umem); + + return false; +} + #endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 7f78c004d12f..631af8dee517 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,18 +4,23 @@ #include "setup.h" #include "en/params.h" +/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may + * change unexpectedly, and mlx5e has a minimum valid stride size for striding + * RQ, keep this check in the driver. + */ +#define MLX5E_MIN_XSK_CHUNK_SIZE 2048 + bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev) { - /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current - * mlx5e XDP implementation doesn't support multiple packets per page. - */ - if (xsk->chunk_size != PAGE_SIZE) + /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ + if (xsk->chunk_size > PAGE_SIZE || + xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) return false; /* Current MTU and XSK headroom don't allow packets to fit the frames. */ - if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size) return false; /* frag_sz is different for regular and XSK RQs, so ensure that linear @@ -60,24 +65,28 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, struct mlx5e_channel *c) { - struct mlx5e_channel_param cparam = {}; + struct mlx5e_channel_param *cparam; struct dim_cq_moder icocq_moder = {}; int err; if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) return -EINVAL; - mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM; + + mlx5e_build_xsk_cparam(priv, params, xsk, cparam); - err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq); if (unlikely(err)) - return err; + goto err_free_cparam; - err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq); if (unlikely(err)) goto err_close_rx_cq; - err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq); if (unlikely(err)) goto err_close_rq; @@ -87,21 +96,23 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, * is disabled and then reenabled, but the SQ continues receiving CQEs * from the old UMEM. */ - err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true); if (unlikely(err)) goto err_close_tx_cq; - err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq); if (unlikely(err)) goto err_close_sq; /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be * triggered and NAPI to be called on the correct CPU. */ - err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq); if (unlikely(err)) goto err_close_icocq; + kvfree(cparam); + spin_lock_init(&c->xskicosq_lock); set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); @@ -123,6 +134,9 @@ err_close_rq: err_close_rx_cq: mlx5e_close_cq(&c->xskrq.cq); +err_free_cparam: + kvfree(cparam); + return err; } @@ -141,6 +155,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + mlx5e_activate_icosq(&c->xskicosq); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ @@ -153,6 +168,7 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { mlx5e_deactivate_rq(&c->xskrq); /* TX queue is disabled on close. */ + mlx5e_deactivate_icosq(&c->xskicosq); } static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 35e188cf4ea4..87827477d38c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -7,7 +7,7 @@ #include "en/params.h" #include <net/xdp_sock.h> -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_params *params = &priv->channels.params; @@ -26,6 +26,13 @@ int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) return -ENXIO; if (!napi_if_scheduled_mark_missed(&c->napi)) { + /* To avoid WQE overrun, don't post a NOP if XSKICOSQ is not + * active and not polled by NAPI. Return 0, because the upcoming + * activate will trigger the IRQ for us. + */ + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->xskicosq.state))) + return 0; + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); spin_unlock(&c->xskicosq_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h index 7add18bf78d8..79b487d89757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -5,11 +5,23 @@ #define __MLX5_EN_XSK_TX_H__ #include "en.h" +#include <net/xdp_sock.h> /* TX data path */ -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); +static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq) +{ + if (!xsk_umem_uses_need_wakeup(sq->umem)) + return; + + if (sq->pc != sq->cc) + xsk_clear_tx_need_wakeup(sq->umem); + else + xsk_set_tx_need_wakeup(sq->umem); +} + #endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 7833ddef0427..d195366461c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -256,8 +256,7 @@ struct mlx5e_dump_wqe { }; static int -tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, - skb_frag_t *frag, u32 tisn, bool first) +tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first) { struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_data_seg *dseg; @@ -371,8 +370,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, tx_post_resync_params(sq, priv_tx, info.rcd_sn); for (i = 0; i < info.nr_frags; i++) - if (tx_post_resync_dump(sq, skb, info.frags[i], - priv_tx->tisn, !i)) + if (tx_post_resync_dump(sq, info.frags[i], priv_tx->tisn, !i)) goto err_out; /* If no dump WQE was sent, we need to have a fence NOP WQE before the @@ -408,7 +406,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, goto out; tls_ctx = tls_get_ctx(skb->sk); - if (unlikely(WARN_ON_ONCE(tls_ctx->netdev != netdev))) + if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) goto err_out; priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 1539cf3de5dc..f7890e0ce96c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -180,15 +180,3 @@ out: return err; } - -u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev) -{ - u8 min_inline_mode; - - mlx5_query_min_inline(mdev, &min_inline_mode); - if (min_inline_mode == MLX5_INLINE_MODE_NONE && - !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - min_inline_mode = MLX5_INLINE_MODE_L2; - - return min_inline_mode; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 8dd31b5c740c..01f2918063af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1101,7 +1101,7 @@ void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, struct mlx5e_params *params) { - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev); + mlx5_query_min_inline(priv->mdev, ¶ms->tx_min_inline_mode); if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 20e628c907e5..c5a9c20d7f00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1431,7 +1431,7 @@ static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) return ret; } -static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode) +static __u32 mlx5e_reformat_wol_mode_mlx5_to_linux(u8 mode) { __u32 ret = 0; @@ -1459,7 +1459,7 @@ static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode) return ret; } -static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode) +static u8 mlx5e_reformat_wol_mode_linux_to_mlx5(__u32 mode) { u8 ret = 0; @@ -1505,7 +1505,7 @@ static void mlx5e_get_wol(struct net_device *netdev, if (err) return; - wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode); + wol->wolopts = mlx5e_reformat_wol_mode_mlx5_to_linux(mlx5_wol_mode); } static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) @@ -1521,7 +1521,7 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) if (wol->wolopts & ~wol_supported) return -EINVAL; - mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts); + mlx5_wol_mode = mlx5e_reformat_wol_mode_linux_to_mlx5(wol->wolopts); return mlx5_set_port_wol(mdev, mlx5_wol_mode); } @@ -1958,21 +1958,27 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -#ifndef CONFIG_MLX5_EN_RXNFC -/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS - * otherwise this function will be defined from en_fs_ethtool.c - */ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); - if (info->cmd != ETHTOOL_GRXRINGS) - return -EOPNOTSUPP; - /* ring_count is needed by ethtool -x */ - info->data = priv->channels.params.num_channels; - return 0; + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); +} + +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + return mlx5e_ethtool_set_rxnfc(dev, cmd); } -#endif const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, @@ -1993,9 +1999,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, -#ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, -#endif .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 76cc10e44080..15b7f0f1427c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -747,8 +747,55 @@ static struct mlx5e_etype_proto ttc_tunnel_rules[] = { .etype = ETH_P_IPV6, .proto = IPPROTO_GRE, }, + [MLX5E_TT_IPV4_IPIP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPIP, + }, + [MLX5E_TT_IPV6_IPIP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPIP, + }, + [MLX5E_TT_IPV4_IPV6] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPV6, + }, + [MLX5E_TT_IPV6_IPV6] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPV6, + }, + }; +bool mlx5e_tunnel_proto_supported(struct mlx5_core_dev *mdev, u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip); + default: + return false; + } +} + +bool mlx5e_any_tunnel_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + if (mlx5e_tunnel_proto_supported(mdev, ttc_tunnel_rules[tt].proto)) + return true; + } + return false; +} + +bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5e_any_tunnel_proto_supported(mdev) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + static u8 mlx5e_etype_to_ipv(u16 ethertype) { if (ethertype == ETH_P_IP) @@ -838,6 +885,9 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = params->inner_ttc->ft.t; for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + if (!mlx5e_tunnel_proto_supported(priv->mdev, + ttc_tunnel_rules[tt].proto)) + continue; rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_tunnel_rules[tt].etype, ttc_tunnel_rules[tt].proto); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 94304abc49e9..eed7101e8bb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -888,10 +888,10 @@ static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, return 0; } -int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { - int err = 0; struct mlx5e_priv *priv = netdev_priv(dev); + int err = 0; switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: @@ -911,16 +911,13 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } -int mlx5e_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *info, u32 *rule_locs) +int mlx5e_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev); int err = 0; switch (info->cmd) { - case ETHTOOL_GRXRINGS: - info->data = priv->channels.params.num_channels; - break; case ETHTOOL_GRXCLSRLCNT: info->rule_cnt = priv->fs.ethtool.tot_num_rules; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9d5f6e56188f..7569287f8f3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -56,12 +56,13 @@ #include "en/xdp.h" #include "lib/eq.h" #include "en/monitor_stats.h" -#include "en/reporter.h" +#include "en/health.h" #include "en/params.h" #include "en/xsk/umem.h" #include "en/xsk/setup.h" #include "en/xsk/rx.h" #include "en/xsk/tx.h" +#include "en/hv_vhca_stats.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) @@ -247,26 +248,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return mlx5_wq_ll_get_size(&rq->mpwqe.wq); - default: - return mlx5_wq_cyc_get_size(&rq->wqe.wq); - } -} - -static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return rq->mpwqe.wq.cur_sz; - default: - return rq->wqe.wq.cur_sz; - } -} - static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, struct mlx5e_channel *c) { @@ -382,6 +363,13 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq) kvfree(rq->wqe.di); } +static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work); + + mlx5e_reporter_rq_cqe_err(rq); +} + static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -418,6 +406,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->stats = &c->priv->channel_stats[c->ix].xskrq; else rq->stats = &c->priv->channel_stats[c->ix].rq; + INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { @@ -720,8 +709,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq, return err; } -static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, - int next_state) +int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5_core_dev *mdev = rq->mdev; @@ -829,10 +817,11 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); + mlx5e_reporter_rx_timeout(rq); return -ETIMEDOUT; } -static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +void mlx5e_free_rx_descs(struct mlx5e_rq *rq) { __be16 wqe_ix_be; u16 wqe_ix; @@ -911,7 +900,7 @@ err_free_rq: return err; } -static void mlx5e_activate_rq(struct mlx5e_rq *rq) +void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); mlx5e_trigger_irq(&rq->channel->icosq); @@ -926,6 +915,8 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); + cancel_work_sync(&rq->channel->icosq.recover_work); + cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); mlx5e_free_rq(rq); @@ -1042,6 +1033,14 @@ static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) return 0; } +static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + mlx5e_reporter_icosq_cqe_err(sq); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) @@ -1064,6 +1063,8 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; + INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + return 0; err_sq_wq_destroy: @@ -1130,6 +1131,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) { @@ -1312,7 +1315,6 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, return 0; err_free_txqsq: - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); mlx5e_free_txqsq(sq); return err; @@ -1377,7 +1379,7 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, recover_work); - mlx5e_tx_reporter_err_cqe(sq); + mlx5e_reporter_tx_err_cqe(sq); } int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, @@ -1393,7 +1395,6 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = params->tx_min_inline_mode; - set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); if (err) goto err_free_icosq; @@ -1401,18 +1402,27 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, return 0; err_free_icosq: - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); mlx5e_free_icosq(sq); return err; } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) { - struct mlx5e_channel *c = sq->channel; + set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); +} - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) +{ + struct mlx5e_channel *c = icosq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); napi_synchronize(&c->napi); +} + +void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_icosq(sq); @@ -1430,7 +1440,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, return err; csp.tis_lst_sz = 1; - csp.tisn = c->priv->tisn[0]; /* tc = 0 */ + csp.tisn = c->priv->tisn[c->lag_port][0]; /* tc = 0 */ csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; @@ -1680,7 +1690,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, for (tc = 0; tc < params->num_tc; tc++) { int txq_ix = c->ix + tc * priv->max_nch; - err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix, + err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, params, &cparam->sq, &c->sq[tc], tc); if (err) goto err_close_sqs; @@ -1914,6 +1924,13 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_cq(&c->icosq.cq); } +static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) +{ + u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id); + + return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, @@ -1948,6 +1965,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; c->irq_desc = irq_to_desc(irq); + c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); err = mlx5e_alloc_xps_cpumask(c, params); if (err) @@ -1989,6 +2007,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_icosq(&c->icosq); mlx5e_activate_rq(&c->rq); netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); @@ -2004,6 +2023,7 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_xsk(c); mlx5e_deactivate_rq(&c->rq); + mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); } @@ -2321,10 +2341,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } - if (!IS_ERR_OR_NULL(priv->tx_reporter)) - devlink_health_reporter_state_update(priv->tx_reporter, - DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); - + mlx5e_health_channels_update(priv); kvfree(cparam); return 0; @@ -3168,40 +3185,58 @@ void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) mlx5_core_destroy_tis(mdev, tisn); } +void mlx5e_destroy_tises(struct mlx5e_priv *priv) +{ + int tc, i; + + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); +} + +static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1; +} + int mlx5e_create_tises(struct mlx5e_priv *priv) { + int tc, i; int err; - int tc; - for (tc = 0; tc < priv->profile->max_tc; tc++) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; - void *tisc; + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; - tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - MLX5_SET(tisc, tisc, prio, tc << 1); + MLX5_SET(tisc, tisc, prio, tc << 1); - err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]); - if (err) - goto err_close_tises; + if (mlx5e_lag_should_assign_affinity(priv->mdev)) + MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1); + + err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]); + if (err) + goto err_close_tises; + } } return 0; err_close_tises: - for (tc--; tc >= 0; tc--) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + for (; i >= 0; i--) { + for (tc--; tc >= 0; tc--) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); + tc = priv->profile->max_tc; + } return err; } static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { - int tc; - - mlx5e_tx_reporter_destroy(priv); - for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5e_destroy_tises(priv); } static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, @@ -3422,7 +3457,7 @@ out: #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, struct flow_cls_offload *cls_flower, - int flags) + unsigned long flags) { switch (cls_flower->command) { case FLOW_CLS_REPLACE: @@ -3442,12 +3477,12 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); struct mlx5e_priv *priv = cb_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | - MLX5E_TC_NIC_OFFLOAD); + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); default: return -EOPNOTSUPP; } @@ -3463,11 +3498,15 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { #ifdef CONFIG_MLX5_ESWITCH - case TC_SETUP_BLOCK: + case TC_SETUP_BLOCK: { + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, &mlx5e_block_cb_list, mlx5e_setup_tc_block_cb, priv, priv, true); + } #endif case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(priv, type_data); @@ -3640,7 +3679,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@ -3781,9 +3820,10 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); } if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - features &= ~NETIF_F_LRO; - if (params->lro_en) + if (features & NETIF_F_LRO) { netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_LRO; + } } if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { @@ -3950,7 +3990,8 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); err = mlx5e_modify_rx_cqe_compression_locked(priv, false); if (err) { netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); @@ -4202,6 +4243,8 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, switch (proto) { case IPPROTO_GRE: + case IPPROTO_IPIP: + case IPPROTO_IPV6: return features; case IPPROTO_UDP: udph = udp_hdr(skb); @@ -4267,7 +4310,7 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) if (!netif_xmit_stopped(dev_queue)) continue; - if (mlx5e_tx_reporter_timeout(sq)) + if (mlx5e_reporter_tx_timeout(sq)) report_failed = true; } @@ -4535,7 +4578,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, - .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, + .ndo_xsk_wakeup = mlx5e_xsk_wakeup, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -4768,7 +4811,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); /* TX inline */ - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); /* RSS */ mlx5e_build_rss_params(rss_params, params->num_channels); @@ -4838,7 +4881,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) || - MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + mlx5e_any_tunnel_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; @@ -4853,7 +4896,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } - if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) { netdev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM; netdev->hw_enc_features |= NETIF_F_GSO_GRE | @@ -4862,6 +4905,15 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) NETIF_F_GSO_GRE_CSUM; } + if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_IPIP)) { + netdev->hw_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + } + netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; netdev->hw_features |= NETIF_F_GSO_UDP_L4; @@ -4965,12 +5017,14 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5e_build_nic_netdev(netdev); mlx5e_build_tc2txq_maps(priv); + mlx5e_health_create_reporters(priv); return 0; } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + mlx5e_health_destroy_reporters(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); mlx5e_netdev_cleanup(priv->netdev, priv); @@ -5073,7 +5127,6 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif - mlx5e_tx_reporter_create(priv); return 0; } @@ -5097,6 +5150,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_init(priv); + mlx5e_hv_vhca_stats_create(priv); if (netdev->reg_state != NETREG_REGISTERED) return; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -5129,6 +5183,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index d0684fdb69e1..95892a3b63a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -38,6 +38,7 @@ #include <net/netevent.h> #include <net/arp.h> #include <net/devlink.h> +#include <net/ipv6_stubs.h> #include "eswitch.h" #include "en.h" @@ -46,6 +47,8 @@ #include "en/tc_tun.h" #include "fs_core.h" #include "lib/port_tun.h" +#define CREATE_TRACE_POINTS +#include "diag/en_rep_tracepoint.h" #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) @@ -389,24 +392,17 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, }; -static int mlx5e_rep_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid) +static void mlx5e_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { - struct mlx5_eswitch *esw; struct mlx5e_priv *priv; u64 parent_id; priv = netdev_priv(dev); - esw = priv->mdev->priv.eswitch; - - if (esw->mode == MLX5_ESWITCH_NONE) - return -EOPNOTSUPP; parent_id = mlx5_query_nic_system_image_guid(priv->mdev); ppid->id_len = sizeof(parent_id); memcpy(ppid->id, &parent_id, sizeof(parent_id)); - - return 0; } static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, @@ -504,16 +500,18 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) mlx5e_sqs2vport_stop(esw, rep); } +static unsigned long mlx5e_rep_ipv6_interval(void) +{ + if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) + return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); + + return ~0UL; +} + static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) { -#if IS_ENABLED(CONFIG_IPV6) - unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms, - DELAY_PROBE_TIME); -#else - unsigned long ipv6_interval = ~0UL; -#endif - unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, - DELAY_PROBE_TIME); + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); @@ -531,47 +529,97 @@ void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) neigh_update->min_interval); } +static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + return refcount_inc_not_zero(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); + +static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) { + mlx5e_rep_neigh_entry_remove(nhe); + kfree_rcu(nhe, rcu); + } +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh_hash_entry *next = NULL; + + rcu_read_lock(); + + for (next = nhe ? + list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &nhe->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list) : + list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list); + next; + next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &next->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list)) + if (mlx5e_rep_neigh_entry_hold(next)) + break; + + rcu_read_unlock(); + + if (nhe) + mlx5e_rep_neigh_entry_release(nhe); + + return next; +} + static void mlx5e_rep_neigh_stats_work(struct work_struct *work) { struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, neigh_update.neigh_stats_work.work); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_neigh_hash_entry *nhe; + struct mlx5e_neigh_hash_entry *nhe = NULL; rtnl_lock(); if (!list_empty(&rpriv->neigh_update.neigh_list)) mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list) + while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) mlx5e_tc_update_neigh_used_value(nhe); rtnl_unlock(); } -static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) -{ - refcount_inc(&nhe->refcnt); -} - -static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) -{ - if (refcount_dec_and_test(&nhe->refcnt)) - kfree(nhe); -} - static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e, bool neigh_connected, unsigned char ha[ETH_ALEN]) { struct ethhdr *eth = (struct ethhdr *)e->encap_header; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool encap_connected; + LIST_HEAD(flow_list); ASSERT_RTNL(); + /* wait for encap to be fully initialized */ + wait_for_completion(&e->res_ready); + + mutex_lock(&esw->offloads.encap_tbl_lock); + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + if (e->compl_result || (encap_connected == neigh_connected && + ether_addr_equal(e->h_dest, ha))) + goto unlock; + + mlx5e_take_all_encap_flows(e, &flow_list); + if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) - mlx5e_tc_encap_flows_del(priv, e); + mlx5e_tc_encap_flows_del(priv, e, &flow_list); if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { ether_addr_copy(e->h_dest, ha); @@ -581,8 +629,11 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, */ ether_addr_copy(eth->h_source, e->route_dev->dev_addr); - mlx5e_tc_encap_flows_add(priv, e); + mlx5e_tc_encap_flows_add(priv, e, &flow_list); } +unlock: + mutex_unlock(&esw->offloads.encap_tbl_lock); + mlx5e_put_encap_flow_list(priv, &flow_list); } static void mlx5e_rep_neigh_update(struct work_struct *work) @@ -594,7 +645,6 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; bool neigh_connected; - bool encap_connected; u8 nud_state, dead; rtnl_lock(); @@ -612,13 +662,15 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) neigh_connected = (nud_state & NUD_VALID) && !dead; + trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + list_for_each_entry(e, &nhe->encap_list, encap_list) { - encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); - priv = netdev_priv(e->out_dev); + if (!mlx5e_encap_take(e)) + continue; - if (encap_connected != neigh_connected || - !ether_addr_equal(e->h_dest, ha)) - mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + priv = netdev_priv(e->out_dev); + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + mlx5e_encap_put(priv, e); } mlx5e_rep_neigh_entry_release(nhe); rtnl_unlock(); @@ -659,8 +711,8 @@ mlx5e_rep_indr_offload(struct net_device *netdev, struct flow_cls_offload *flower, struct mlx5e_rep_indr_block_priv *indr_priv) { + unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); - int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD; int err = 0; switch (flower->command) { @@ -714,6 +766,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + f->unlocked_driver_cb = true; f->driver_block_list = &mlx5e_block_cb_list; switch (f->command) { @@ -722,10 +775,6 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (indr_priv) return -EEXIST; - if (flow_block_cb_is_busy(mlx5e_rep_indr_setup_block_cb, - indr_priv, &mlx5e_block_cb_list)) - return -EBUSY; - indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); if (!indr_priv) return -ENOMEM; @@ -785,9 +834,9 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, { int err; - err = __tc_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_tc_cb, - rpriv); + err = __flow_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_tc_cb, + rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -800,8 +849,8 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, struct net_device *netdev) { - __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, - rpriv); + __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + rpriv); } static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, @@ -827,6 +876,28 @@ static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, return NOTIFY_OK; } +static void +mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe, + struct neighbour *n) +{ + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } +} + static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); @@ -849,7 +920,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, case NETEVENT_NEIGH_UPDATE: n = ptr; #if IS_ENABLED(CONFIG_IPV6) - if (n->tbl != &nd_tbl && n->tbl != &arp_tbl) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) #else if (n->tbl != &arp_tbl) #endif @@ -859,34 +930,13 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, m_neigh.family = n->ops->family; memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - /* We are in atomic context and can't take RTNL mutex, so use - * spin_lock_bh to lookup the neigh table. bh is used since - * netevent can be called from a softirq context. - */ - spin_lock_bh(&neigh_update->encap_lock); + rcu_read_lock(); nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); - if (!nhe) { - spin_unlock_bh(&neigh_update->encap_lock); + rcu_read_unlock(); + if (!nhe) return NOTIFY_DONE; - } - - /* This assignment is valid as long as the the neigh reference - * is taken - */ - nhe->n = n; - - /* Take a reference to ensure the neighbour and mlx5 encap - * entry won't be destructed until we drop the reference in - * delayed work. - */ - neigh_hold(n); - mlx5e_rep_neigh_entry_hold(nhe); - if (!queue_work(priv->wq, &nhe->neigh_update_work)) { - mlx5e_rep_neigh_entry_release(nhe); - neigh_release(n); - } - spin_unlock_bh(&neigh_update->encap_lock); + mlx5e_rep_queue_neigh_update_work(priv, nhe, n); break; case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -897,25 +947,21 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, * done per device delay prob time parameter. */ #if IS_ENABLED(CONFIG_IPV6) - if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl)) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) #else if (!p->dev || p->tbl != &arp_tbl) #endif return NOTIFY_DONE; - /* We are in atomic context and can't take RTNL mutex, - * so use spin_lock_bh to walk the neigh list and look for - * the relevant device. bh is used since netevent can be - * called from a softirq context. - */ - spin_lock_bh(&neigh_update->encap_lock); - list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) { + rcu_read_lock(); + list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, + neigh_list) { if (p->dev == nhe->m_neigh.dev) { found = true; break; } } - spin_unlock_bh(&neigh_update->encap_lock); + rcu_read_unlock(); if (!found) return NOTIFY_DONE; @@ -946,7 +992,7 @@ static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) return err; INIT_LIST_HEAD(&neigh_update->neigh_list); - spin_lock_init(&neigh_update->encap_lock); + mutex_init(&neigh_update->encap_lock); INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, mlx5e_rep_neigh_stats_work); mlx5e_rep_neigh_update_init_interval(rpriv); @@ -973,6 +1019,7 @@ static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + mutex_destroy(&neigh_update->encap_lock); rhashtable_destroy(&neigh_update->neigh_ht); } @@ -988,28 +1035,27 @@ static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, if (err) return err; - list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); return err; } -static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe) +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) { - struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; - spin_lock_bh(&rpriv->neigh_update.encap_lock); + mutex_lock(&rpriv->neigh_update.encap_lock); - list_del(&nhe->neigh_list); + list_del_rcu(&nhe->neigh_list); rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, &nhe->rhash_node, mlx5e_neigh_ht_params); - spin_unlock_bh(&rpriv->neigh_update.encap_lock); + mutex_unlock(&rpriv->neigh_update.encap_lock); } -/* This function must only be called under RTNL lock or under the - * representor's encap_lock in case RTNL mutex can't be held. +/* This function must only be called under the representor's encap_lock or + * inside rcu read lock section. */ static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, @@ -1017,9 +1063,11 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_neigh_hash_entry *nhe; - return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, - mlx5e_neigh_ht_params); + nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); + return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; } static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, @@ -1032,8 +1080,10 @@ static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, if (!*nhe) return -ENOMEM; + (*nhe)->priv = priv; memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); @@ -1047,19 +1097,6 @@ out_free: return err; } -static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe) -{ - /* The neigh hash entry must be removed from the hash table regardless - * of the reference count value, so it won't be found by the next - * neigh notification call. The neigh hash entry reference count is - * incremented only during creation and neigh notification calls and - * protects from freeing the nhe struct. - */ - mlx5e_rep_neigh_entry_remove(priv, nhe); - mlx5e_rep_neigh_entry_release(nhe); -} - int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { @@ -1072,16 +1109,26 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); if (err) return err; + + mutex_lock(&rpriv->neigh_update.encap_lock); nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); if (!nhe) { err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); if (err) { + mutex_unlock(&rpriv->neigh_update.encap_lock); mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); return err; } } - list_add(&e->encap_list, &nhe->encap_list); + + e->nhe = nhe; + spin_lock(&nhe->encap_list_lock); + list_add_rcu(&e->encap_list, &nhe->encap_list); + spin_unlock(&nhe->encap_list_lock); + + mutex_unlock(&rpriv->neigh_update.encap_lock); + return 0; } @@ -1091,13 +1138,16 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; - struct mlx5e_neigh_hash_entry *nhe; - list_del(&e->encap_list); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!e->nhe) + return; + + spin_lock(&e->nhe->encap_list_lock); + list_del_rcu(&e->encap_list); + spin_unlock(&e->nhe->encap_list_lock); - if (list_empty(&nhe->encap_list)) - mlx5e_rep_neigh_entry_destroy(priv, nhe); + mlx5e_rep_neigh_entry_release(e->nhe); + e->nhe = NULL; mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } @@ -1160,15 +1210,34 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, } } +static +int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + switch (ma->command) { + case TC_CLSMATCHALL_REPLACE: + return mlx5e_tc_configure_matchall(priv, ma); + case TC_CLSMATCHALL_DESTROY: + return mlx5e_tc_delete_matchall(priv, ma); + case TC_CLSMATCHALL_STATS: + mlx5e_tc_stats_matchall(priv, ma); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = cb_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | - MLX5E_TC_ESW_OFFLOAD); + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); + case TC_SETUP_CLSMATCHALL: + return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); default: return -EOPNOTSUPP; } @@ -1180,9 +1249,11 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; switch (type) { case TC_SETUP_BLOCK: + f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, &mlx5e_rep_block_cb_list, mlx5e_rep_setup_tc_cb, @@ -1553,7 +1624,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_rep_uplink_priv *uplink_priv; - int tc, err; + int err; err = mlx5e_create_tises(priv); if (err) { @@ -1564,6 +1635,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { uplink_priv = &rpriv->uplink_priv; + mutex_init(&uplink_priv->unready_flows_lock); INIT_LIST_HEAD(&uplink_priv->unready_flows); /* init shared tc flow table */ @@ -1588,18 +1660,15 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) tc_esw_cleanup: mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); destroy_tises: - for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5e_destroy_tises(priv); return err; } static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; - int tc; - for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5e_destroy_tises(priv); if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { /* clean indirect TC block notifications */ @@ -1608,6 +1677,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) /* delete shared tc flow table */ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); } } @@ -1731,37 +1801,46 @@ is_devlink_port_supported(const struct mlx5_core_dev *dev, mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport); } +static unsigned int +vport_to_devlink_port_index(const struct mlx5_core_dev *dev, u16 vport_num) +{ + return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num; +} + static int register_devlink_port(struct mlx5_core_dev *dev, struct mlx5e_rep_priv *rpriv) { struct devlink *devlink = priv_to_devlink(dev); struct mlx5_eswitch_rep *rep = rpriv->rep; struct netdev_phys_item_id ppid = {}; - int ret; + unsigned int dl_port_index = 0; if (!is_devlink_port_supported(dev, rpriv)) return 0; - ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); - if (ret) - return ret; + mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) { devlink_port_attrs_set(&rpriv->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, PCI_FUNC(dev->pdev->devfn), false, 0, &ppid.id[0], ppid.id_len); - else if (rep->vport == MLX5_VPORT_PF) + dl_port_index = vport_to_devlink_port_index(dev, rep->vport); + } else if (rep->vport == MLX5_VPORT_PF) { devlink_port_attrs_pci_pf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, dev->pdev->devfn); - else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) + dl_port_index = rep->vport; + } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, + rpriv->rep->vport)) { devlink_port_attrs_pci_vf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, dev->pdev->devfn, rep->vport - 1); + dl_port_index = vport_to_devlink_port_index(dev, rep->vport); + } - return devlink_port_register(devlink, &rpriv->dl_port, rep->vport); + return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index); } static void unregister_devlink_port(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index c56e6ee4350c..31f83c8adcc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -35,6 +35,7 @@ #include <net/ip_tunnels.h> #include <linux/rhashtable.h> +#include <linux/mutex.h> #include "eswitch.h" #include "en.h" #include "lib/port_tun.h" @@ -48,7 +49,7 @@ struct mlx5e_neigh_update_table { */ struct list_head neigh_list; /* protect lookup/remove operations */ - spinlock_t encap_lock; + struct mutex encap_lock; struct notifier_block netevent_nb; struct delayed_work neigh_stats_work; unsigned long min_interval; /* jiffies */ @@ -75,6 +76,8 @@ struct mlx5_rep_uplink_priv { struct mlx5_tun_entropy tun_entropy; + /* protects unready_flows */ + struct mutex unready_flows_lock; struct list_head unready_flows; struct work_struct reoffload_flows_work; }; @@ -86,6 +89,7 @@ struct mlx5e_rep_priv { struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ + struct rtnl_link_stats64 prev_vf_vport_stats; struct devlink_port dl_port; }; @@ -107,6 +111,7 @@ struct mlx5e_neigh { struct mlx5e_neigh_hash_entry { struct rhash_head rhash_node; struct mlx5e_neigh m_neigh; + struct mlx5e_priv *priv; /* Save the neigh hash entry in a list on the representor in * addition to the hash table. In order to iterate easily over the @@ -114,6 +119,8 @@ struct mlx5e_neigh_hash_entry { */ struct list_head neigh_list; + /* protects encap list */ + spinlock_t encap_list_lock; /* encap list sharing the same neigh */ struct list_head encap_list; @@ -134,6 +141,8 @@ struct mlx5e_neigh_hash_entry { * 'used' value and avoid neigh deleting by the kernel. */ unsigned long reported_lastuse; + + struct rcu_head rcu; }; enum { @@ -142,6 +151,8 @@ enum { }; struct mlx5e_encap_entry { + /* attached neigh hash entry */ + struct mlx5e_neigh_hash_entry *nhe; /* neigh hash entry list of encaps sharing the same neigh */ struct list_head encap_list; struct mlx5e_neigh m_neigh; @@ -150,7 +161,7 @@ struct mlx5e_encap_entry { */ struct hlist_node encap_hlist; struct list_head flows; - u32 encap_id; + struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ @@ -161,6 +172,10 @@ struct mlx5e_encap_entry { u8 flags; char *encap_header; int encap_size; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct rcu_head rcu; }; struct mlx5e_rep_sq { @@ -168,7 +183,6 @@ struct mlx5e_rep_sq { struct list_head list; }; -void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ac6e586d403d..d6a547238de0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -48,6 +48,7 @@ #include "lib/clock.h" #include "en/xdp.h" #include "en/xsk/rx.h" +#include "en/health.h" static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { @@ -615,6 +616,8 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { netdev_WARN_ONCE(cq->channel->netdev, "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(cq->channel->priv->wq, &sq->recover_work); break; } do { @@ -692,8 +695,11 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; rq->mpwqe.actual_wq_head = head; - /* If XSK Fill Ring doesn't have enough frames, busy poll by - * rescheduling the NAPI poll. + /* If XSK Fill Ring doesn't have enough frames, report the error, so + * that one of the actions can be performed: + * 1. If need_wakeup is used, signal that the application has to kick + * the driver when it refills the Fill Ring. + * 2. Otherwise, busy poll by rescheduling the NAPI poll. */ if (unlikely(alloc_err == -ENOMEM && rq->umem)) return true; @@ -859,13 +865,24 @@ tail_padding_csum(struct sk_buff *skb, int offset, } static void -mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, - struct mlx5e_rq_stats *stats) +mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) { struct ipv6hdr *ip6; struct iphdr *ip4; int pkt_len; + /* Fixup vlan headers, if any */ + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); + + /* Fixup tail padding, if any */ switch (proto) { case htons(ETH_P_IP): ip4 = (struct iphdr *)(skb->data + network_depth); @@ -931,16 +948,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; /* CQE csum covers all received bytes */ /* csum might need some fixups ...*/ - if (network_depth > ETH_HLEN) - /* CQE csum is calculated from the IP header and does - * not cover VLAN headers (if present). This will add - * the checksum manually. - */ - skb->csum = csum_partial(skb->data + ETH_HLEN, - network_depth - ETH_HLEN, - skb->csum); - - mlx5e_skb_padding_csum(skb, network_depth, proto, stats); + mlx5e_skb_csum_fixup(skb, network_depth, proto, stats); return; } @@ -1065,11 +1073,6 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { - rq->stats->wqe_err++; - return NULL; - } - rcu_read_lock(); consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false); rcu_read_unlock(); @@ -1097,11 +1100,6 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u16 byte_cnt = cqe_bcnt - headlen; struct sk_buff *skb; - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { - rq->stats->wqe_err++; - return NULL; - } - /* XDP is not supported in this configuration, as incoming packets * might spread among multiple pages. */ @@ -1135,6 +1133,15 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, return skb; } +static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; + + if (cqe_syndrome_needs_recover(err_cqe->syndrome) && + !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) + queue_work(rq->channel->priv->wq, &rq->recover_work); +} + void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -1147,6 +1154,12 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); + rq->stats->wqe_err++; + goto free_wqe; + } + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, @@ -1188,6 +1201,11 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto free_wqe; + } + skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ @@ -1322,7 +1340,8 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi->consumed_strides += cstrides; - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); rq->stats->wqe_err++; goto mpwrq_cqe_out; } @@ -1498,6 +1517,11 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto wq_free_wqe; + } + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, @@ -1533,26 +1557,27 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto wq_free_wqe; + } + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, rq, cqe, wi, cqe_bcnt); - if (unlikely(!skb)) { - /* a DROP, save the page-reuse checks */ - mlx5e_free_rx_wqe(rq, wi, true); - goto wq_cyc_pop; - } + if (unlikely(!skb)) /* a DROP, save the page-reuse checks */ + goto wq_free_wqe; + skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt); - if (unlikely(!skb)) { - mlx5e_free_rx_wqe(rq, wi, true); - goto wq_cyc_pop; - } + if (unlikely(!skb)) + goto wq_free_wqe; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); +wq_free_wqe: mlx5e_free_rx_wqe(rq, wi, true); -wq_cyc_pop: mlx5_wq_cyc_pop(wq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 57f9f346d213..ac6fdcda7019 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -74,6 +74,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, @@ -90,6 +91,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, @@ -107,6 +109,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, @@ -200,6 +203,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_xdp_tx_xmit += xdpsq_stats->xmit; s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe; s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw; + s->rx_xdp_tx_nops += xdpsq_stats->nops; s->rx_xdp_tx_full += xdpsq_stats->full; s->rx_xdp_tx_err += xdpsq_stats->err; s->rx_xdp_tx_cqe += xdpsq_stats->cqes; @@ -217,6 +221,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_cache_waive += rq_stats->cache_waive; s->rx_congst_umr += rq_stats->congst_umr; s->rx_arfs_err += rq_stats->arfs_err; + s->rx_recover += rq_stats->recover; s->ch_events += ch_stats->events; s->ch_poll += ch_stats->poll; s->ch_arm += ch_stats->arm; @@ -227,6 +232,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_xdp_xmit += xdpsq_red_stats->xmit; s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe; s->tx_xdp_inlnw += xdpsq_red_stats->inlnw; + s->tx_xdp_nops += xdpsq_red_stats->nops; s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; @@ -363,17 +369,27 @@ static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv) } #define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c) -static const struct counter_desc vnic_env_stats_desc[] = { +static const struct counter_desc vnic_env_stats_steer_desc[] = { { "rx_steer_missed_packets", VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) }, }; -#define NUM_VNIC_ENV_COUNTERS ARRAY_SIZE(vnic_env_stats_desc) +static const struct counter_desc vnic_env_stats_dev_oob_desc[] = { + { "dev_internal_queue_oob", + VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) }, +}; + +#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \ + ARRAY_SIZE(vnic_env_stats_steer_desc) : 0) +#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \ + ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0) static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv) { - return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ? - NUM_VNIC_ENV_COUNTERS : 0; + return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); } static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data, @@ -381,12 +397,13 @@ static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data, { int i; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) - return idx; + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_steer_desc[i].format); - for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - vnic_env_stats_desc[i].format); + vnic_env_stats_dev_oob_desc[i].format); return idx; } @@ -395,12 +412,13 @@ static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data, { int i; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) - return idx; - - for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++) + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, - vnic_env_stats_desc, i); + vnic_env_stats_steer_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_dev_oob_desc, i); return idx; } @@ -963,6 +981,147 @@ static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); } +#define PPORT_PER_TC_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_prio_stats_desc[] = { + { "rx_prio%d_buf_discard", PPORT_PER_TC_PRIO_OFF(no_buffer_discard_uc) }, +}; + +#define NUM_PPORT_PER_TC_PRIO_COUNTERS ARRAY_SIZE(pport_per_tc_prio_stats_desc) + +#define PPORT_PER_TC_CONGEST_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_congest_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_congest_prio_stats_desc[] = { + { "rx_prio%d_cong_discard", PPORT_PER_TC_CONGEST_PRIO_OFF(wred_discard) }, + { "rx_prio%d_marked", PPORT_PER_TC_CONGEST_PRIO_OFF(ecn_marked_tc) }, +}; + +#define NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS \ + ARRAY_SIZE(pport_per_tc_congest_prio_stats_desc) + +static int mlx5e_grp_per_tc_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static int mlx5e_grp_per_port_buffer_congest_fill_strings(struct mlx5e_priv *priv, + u8 *data, int idx) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_prio_stats_desc[i].format, prio); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_congest_prio_stats_desc[i].format, prio); + } + + return idx; +} + +static int mlx5e_grp_per_port_buffer_congest_fill_stats(struct mlx5e_priv *priv, + u64 *data, int idx) +{ + struct mlx5e_pport_stats *pport = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_prio_counters[prio], + pport_per_tc_prio_stats_desc, i); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS ; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_congest_prio_counters[prio], + pport_per_tc_congest_prio_stats_desc, i); + } + + return idx; +} + +static void mlx5e_grp_per_tc_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static int mlx5e_grp_per_tc_congest_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static void mlx5e_grp_per_tc_congest_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_congest_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static int mlx5e_grp_per_port_buffer_congest_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_grp_per_tc_prio_get_num_stats(priv) + + mlx5e_grp_per_tc_congest_prio_get_num_stats(priv); +} + +static void mlx5e_grp_per_port_buffer_congest_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_grp_per_tc_prio_update_stats(priv); + mlx5e_grp_per_tc_congest_prio_update_stats(priv); +} + #define PPORT_PER_PRIO_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -1294,6 +1453,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, }; static const struct counter_desc sq_stats_desc[] = { @@ -1331,6 +1491,7 @@ static const struct counter_desc rq_xdpsq_stats_desc[] = { { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, @@ -1340,6 +1501,7 @@ static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, @@ -1589,7 +1751,13 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = { .get_num_stats = mlx5e_grp_channels_get_num_stats, .fill_strings = mlx5e_grp_channels_fill_strings, .fill_stats = mlx5e_grp_channels_fill_stats, - } + }, + { + .get_num_stats = mlx5e_grp_per_port_buffer_congest_get_num_stats, + .fill_strings = mlx5e_grp_per_port_buffer_congest_fill_strings, + .fill_stats = mlx5e_grp_per_port_buffer_congest_fill_stats, + .update_stats = mlx5e_grp_per_port_buffer_congest_update_stats, + }, }; const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 76ac111e14d0..79f261bf86ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -81,6 +81,7 @@ struct mlx5e_sw_stats { u64 rx_xdp_tx_xmit; u64 rx_xdp_tx_mpwqe; u64 rx_xdp_tx_inlnw; + u64 rx_xdp_tx_nops; u64 rx_xdp_tx_full; u64 rx_xdp_tx_err; u64 rx_xdp_tx_cqe; @@ -97,6 +98,7 @@ struct mlx5e_sw_stats { u64 tx_xdp_xmit; u64 tx_xdp_mpwqe; u64 tx_xdp_inlnw; + u64 tx_xdp_nops; u64 tx_xdp_full; u64 tx_xdp_err; u64 tx_xdp_cqes; @@ -114,6 +116,7 @@ struct mlx5e_sw_stats { u64 rx_cache_waive; u64 rx_congst_umr; u64 rx_arfs_err; + u64 rx_recover; u64 ch_events; u64 ch_poll; u64 ch_arm; @@ -204,6 +207,8 @@ struct mlx5e_pport_stats { __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; }; #define PCIE_PERF_GET(pcie_stats, c) \ @@ -247,6 +252,7 @@ struct mlx5e_rq_stats { u64 cache_waive; u64 congst_umr; u64 arfs_err; + u64 recover; }; struct mlx5e_sq_stats { @@ -288,6 +294,7 @@ struct mlx5e_xdpsq_stats { u64 xmit; u64 mpwqe; u64 inlnw; + u64 nops; u64 full; u64 err; /* dirtied @completion */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 00b2d4a86159..da7555fdb4d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -38,6 +38,8 @@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> +#include <linux/refcount.h> +#include <linux/completion.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> @@ -54,30 +56,32 @@ #include "en/tc_tun.h" #include "lib/devcom.h" #include "lib/geneve.h" +#include "diag/en_tc_tracepoint.h" struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; u32 hairpin_tirn; u8 match_level; struct mlx5_flow_table *hairpin_ft; struct mlx5_fc *counter; }; -#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) enum { - MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, - MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, - MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -100,6 +104,7 @@ enum { * container_of(helper item, containing struct type, helper field[index]) */ struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ struct list_head list; int index; }; @@ -108,7 +113,7 @@ struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u16 flags; + unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported @@ -116,10 +121,17 @@ struct mlx5e_tc_flow { */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ + int tmp_efi_index; + struct list_head tmp_list; /* temporary flow list used by neigh update */ + refcount_t refcnt; + struct rcu_head rcu_head; + struct completion init_done; union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -157,12 +169,20 @@ struct mlx5e_hairpin_entry { /* a node of a hash table which keeps all the hairpin entries */ struct hlist_node hairpin_hlist; + /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same hairpin */ struct list_head flows; + /* hpe's that were not fully initialized when dead peer update event + * function traversed them. + */ + struct list_head dead_peer_wait_list; u16 peer_vhca_id; u8 prio; struct mlx5e_hairpin *hp; + refcount_t refcnt; + struct completion res_ready; }; struct mod_hdr_key { @@ -174,16 +194,93 @@ struct mlx5e_mod_hdr_entry { /* a node of a hash table which keeps all the mod_hdr entries */ struct hlist_node mod_hdr_hlist; + /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same mod_hdr entry */ struct list_head flows; struct mod_hdr_key key; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; + + refcount_t refcnt; + struct completion res_ready; + int compl_result; }; #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) +{ + if (!flow || !refcount_inc_not_zero(&flow->refcnt)) + return ERR_PTR(-EINVAL); + return flow; +} + +static void mlx5e_flow_put(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (refcount_dec_and_test(&flow->refcnt)) { + mlx5e_tc_del_flow(priv, flow); + kfree_rcu(flow, rcu_head); + } +} + +static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); +} + +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + +static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) +{ + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); +} + +#define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); +} + +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; +} + +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, ESWITCH); +} + +static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, OFFLOADED); +} + static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) { return jhash(key->actions, @@ -199,15 +296,62 @@ static inline int cmp_mod_hdr_info(struct mod_hdr_key *a, return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); } +static struct mod_hdr_tbl * +get_mod_hdr_table(struct mlx5e_priv *priv, int namespace) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : + &priv->fs.tc.mod_hdr; +} + +static struct mlx5e_mod_hdr_entry * +mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key) +{ + struct mlx5e_mod_hdr_entry *mh, *found = NULL; + + hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, key)) { + refcount_inc(&mh->refcnt); + found = mh; + break; + } + } + + return found; +} + +static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv, + struct mlx5e_mod_hdr_entry *mh, + int namespace) +{ + struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace); + + if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) + return; + hash_del(&mh->mod_hdr_hlist); + mutex_unlock(&tbl->lock); + + WARN_ON(!list_empty(&mh->flows)); + if (mh->compl_result > 0) + mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr); + + kfree(mh); +} + +static int get_flow_name_space(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? + MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; +} static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int num_actions, actions_size, namespace, err; struct mlx5e_mod_hdr_entry *mh; + struct mod_hdr_tbl *tbl; struct mod_hdr_key key; - bool found = false; u32 hash_key; num_actions = parse_attr->num_mod_hdr_actions; @@ -218,80 +362,82 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, hash_key = hash_mod_hdr_info(&key); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - namespace = MLX5_FLOW_NAMESPACE_FDB; - hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } else { - namespace = MLX5_FLOW_NAMESPACE_KERNEL; - hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } + namespace = get_flow_name_space(flow); + tbl = get_mod_hdr_table(priv, namespace); + + mutex_lock(&tbl->lock); + mh = mlx5e_mod_hdr_get(tbl, &key, hash_key); + if (mh) { + mutex_unlock(&tbl->lock); + wait_for_completion(&mh->res_ready); - if (found) + if (mh->compl_result < 0) { + err = -EREMOTEIO; + goto attach_header_err; + } goto attach_flow; + } mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); - if (!mh) + if (!mh) { + mutex_unlock(&tbl->lock); return -ENOMEM; + } mh->key.actions = (void *)mh + sizeof(*mh); memcpy(mh->key.actions, key.actions, actions_size); mh->key.num_actions = num_actions; + spin_lock_init(&mh->flows_lock); INIT_LIST_HEAD(&mh->flows); - - err = mlx5_modify_header_alloc(priv->mdev, namespace, - mh->key.num_actions, - mh->key.actions, - &mh->mod_hdr_id); - if (err) - goto out_err; - - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); - else - hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + refcount_set(&mh->refcnt, 1); + init_completion(&mh->res_ready); + + hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); + mutex_unlock(&tbl->lock); + + mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace, + mh->key.num_actions, + mh->key.actions); + if (IS_ERR(mh->modify_hdr)) { + err = PTR_ERR(mh->modify_hdr); + mh->compl_result = err; + goto alloc_header_err; + } + mh->compl_result = 1; + complete_all(&mh->res_ready); attach_flow: + flow->mh = mh; + spin_lock(&mh->flows_lock); list_add(&flow->mod_hdr, &mh->flows); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; + spin_unlock(&mh->flows_lock); + if (mlx5e_is_eswitch_flow(flow)) + flow->esw_attr->modify_hdr = mh->modify_hdr; else - flow->nic_attr->mod_hdr_id = mh->mod_hdr_id; + flow->nic_attr->modify_hdr = mh->modify_hdr; return 0; -out_err: - kfree(mh); +alloc_header_err: + complete_all(&mh->res_ready); +attach_header_err: + mlx5e_mod_hdr_put(priv, mh, namespace); return err; } static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->mod_hdr.next; + /* flow wasn't fully initialized */ + if (!flow->mh) + return; + spin_lock(&flow->mh->flows_lock); list_del(&flow->mod_hdr); + spin_unlock(&flow->mh->flows_lock); - if (list_empty(next)) { - struct mlx5e_mod_hdr_entry *mh; - - mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows); - - mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); - hash_del(&mh->mod_hdr_hlist); - kfree(mh); - } + mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow)); + flow->mh = NULL; } static @@ -555,13 +701,35 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, hairpin_hlist, hash_key) { - if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { + refcount_inc(&hpe->refcnt); return hpe; + } } return NULL; } +static void mlx5e_hairpin_put(struct mlx5e_priv *priv, + struct mlx5e_hairpin_entry *hpe) +{ + /* no more hairpin flows for us, release the hairpin pair */ + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) + return; + hash_del(&hpe->hairpin_hlist); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + if (!IS_ERR_OR_NULL(hpe->hp)) { + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + dev_name(hpe->hp->pair->peer_mdev->device)); + + mlx5e_hairpin_destroy(hpe->hp); + } + + WARN_ON(!list_empty(&hpe->flows)); + kfree(hpe); +} + #define UNKNOWN_MATCH_PRIO 8 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, @@ -627,17 +795,37 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, extack); if (err) return err; + + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); - if (hpe) + if (hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + wait_for_completion(&hpe->res_ready); + + if (IS_ERR(hpe->hp)) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + } hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); - if (!hpe) + if (!hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); return -ENOMEM; + } + spin_lock_init(&hpe->flows_lock); INIT_LIST_HEAD(&hpe->flows); + INIT_LIST_HEAD(&hpe->dead_peer_wait_list); hpe->peer_vhca_id = peer_id; hpe->prio = match_prio; + refcount_set(&hpe->refcnt, 1); + init_completion(&hpe->res_ready); + + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, @@ -659,9 +847,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, params.num_channels = link_speed64; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + hpe->hp = hp; + complete_all(&hpe->res_ready); if (IS_ERR(hp)) { err = PTR_ERR(hp); - goto create_hairpin_err; + goto out_err; } netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", @@ -669,46 +859,39 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); - hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, - hash_hairpin_info(peer_id, match_prio)); - attach_flow: if (hpe->hp->num_channels > 1) { - flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow_flag_set(flow, HAIRPIN_RSS); flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; } else { flow->nic_attr->hairpin_tirn = hpe->hp->tirn; } + + flow->hpe = hpe; + spin_lock(&hpe->flows_lock); list_add(&flow->hairpin, &hpe->flows); + spin_unlock(&hpe->flows_lock); return 0; -create_hairpin_err: - kfree(hpe); +out_err: + mlx5e_hairpin_put(priv, hpe); return err; } static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->hairpin.next; + /* flow wasn't fully initialized */ + if (!flow->hpe) + return; + spin_lock(&flow->hpe->flows_lock); list_del(&flow->hairpin); + spin_unlock(&flow->hpe->flows_lock); - /* no more hairpin flows for us, release the hairpin pair */ - if (list_empty(next)) { - struct mlx5e_hairpin_entry *hpe; - - hpe = list_entry(next, struct mlx5e_hairpin_entry, flows); - - netdev_dbg(priv->netdev, "del hairpin: peer %s\n", - dev_name(hpe->hp->pair->peer_mdev->device)); - - mlx5e_hairpin_destroy(hpe->hp); - hash_del(&hpe->hairpin_hlist); - kfree(hpe); - } + mlx5e_hairpin_put(priv, flow->hpe); + flow->hpe = NULL; } static int @@ -723,22 +906,20 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .reformat_id = 0, .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - bool table_created = false; int err, dest_ix = 0; flow_context->flags |= FLOW_CONTEXT_HAS_TAG; flow_context->flow_tag = attr->flow_tag; - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); - if (err) { - goto err_add_hairpin_flow; - } - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + if (err) + return err; + + if (flow_flag_test(flow, HAIRPIN_RSS)) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dest_ix].ft = attr->hairpin_ft; } else { @@ -754,10 +935,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_fc_create; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; @@ -766,12 +946,13 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - flow_act.modify_id = attr->mod_hdr_id; + flow_act.modify_hdr = attr->modify_hdr; kfree(parse_attr->mod_hdr_actions); if (err) - goto err_create_mod_hdr_id; + return err; } + mutex_lock(&priv->fs.tc.t_lock); if (IS_ERR_OR_NULL(priv->fs.tc.t)) { int tc_grp_size, tc_tbl_size; u32 max_flow_counter; @@ -791,15 +972,13 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, MLX5E_TC_TABLE_NUM_GROUPS, MLX5E_TC_FT_LEVEL, 0); if (IS_ERR(priv->fs.tc.t)) { + mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - err = PTR_ERR(priv->fs.tc.t); - goto err_create_ft; + return PTR_ERR(priv->fs.tc.t); } - - table_created = true; } if (attr->match_level != MLX5_MATCH_NONE) @@ -807,29 +986,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); + mutex_unlock(&priv->fs.tc.t_lock); - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } - - return 0; - -err_add_rule: - if (table_created) { - mlx5_destroy_flow_table(priv->fs.tc.t); - priv->fs.tc.t = NULL; - } -err_create_ft: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); -err_create_mod_hdr_id: - mlx5_fc_destroy(dev, counter); -err_fc_create: - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) - mlx5e_hairpin_flow_del(priv, flow); -err_add_hairpin_flow: - return err; + return PTR_ERR_OR_ZERO(flow->rule[0]); } static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@ -839,18 +998,21 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5_fc *counter = NULL; counter = attr->counter; - mlx5_del_flow_rules(flow->rule[0]); + if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { + mutex_lock(&priv->fs.tc.t_lock); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + mutex_unlock(&priv->fs.tc.t_lock); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); } @@ -885,7 +1047,6 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, } } - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; return rule; } @@ -894,7 +1055,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_esw_flow_attr *attr) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + flow_flag_clear(flow, OFFLOADED); if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@ -917,7 +1078,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) - flow->flags |= MLX5E_TC_FLOW_SLOW; + flow_flag_set(flow, SLOW); return rule; } @@ -932,7 +1093,26 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); - flow->flags &= ~MLX5E_TC_FLOW_SLOW; + flow_flag_clear(flow, SLOW); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_add(struct mlx5e_tc_flow *flow, + struct list_head *unready_flows) +{ + flow_flag_set(flow, NOT_READY); + list_add_tail(&flow->unready, unready_flows); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_del(struct mlx5e_tc_flow *flow) +{ + list_del(&flow->unready); + flow_flag_clear(flow, NOT_READY); } static void add_unready_flow(struct mlx5e_tc_flow *flow) @@ -945,14 +1125,24 @@ static void add_unready_flow(struct mlx5e_tc_flow *flow) rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &rpriv->uplink_priv; - flow->flags |= MLX5E_TC_FLOW_NOT_READY; - list_add_tail(&flow->unready, &uplink_priv->unready_flows); + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_add(flow, &uplink_priv->unready_flows); + mutex_unlock(&uplink_priv->unready_flows_lock); } static void remove_unready_flow(struct mlx5e_tc_flow *flow) { - list_del(&flow->unready); - flow->flags &= ~MLX5E_TC_FLOW_NOT_READY; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); } static int @@ -980,14 +1170,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; } if (attr->prio > max_prio) { NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; } for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { @@ -1002,7 +1190,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); if (err) - goto err_attach_encap; + return err; out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@ -1012,21 +1200,19 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) - goto err_add_vlan; + return err; if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); if (err) - goto err_mod_hdr; + return err; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_create_counter; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); attr->counter = counter; } @@ -1044,27 +1230,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); } - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]); + else + flow_flag_set(flow, OFFLOADED); return 0; - -err_add_rule: - mlx5_fc_destroy(attr->counter_dev, counter); -err_create_counter: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); -err_mod_hdr: - mlx5_eswitch_del_vlan_action(esw, attr); -err_add_vlan: - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) - mlx5e_detach_encap(priv, flow, out_index); -err_attach_encap: -err_max_prio_chain: - return err; } static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) @@ -1088,14 +1259,14 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr; int out_index; - if (flow->flags & MLX5E_TC_FLOW_NOT_READY) { + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); return; } - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - if (flow->flags & MLX5E_TC_FLOW_SLOW) + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); @@ -1119,39 +1290,39 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e, + struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr, *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - err = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - e->encap_size, e->encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) { - mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", - err); + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", + PTR_ERR(e->pkt_reformat)); return; } e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry(flow, flow_list, tmp_list) { bool all_flow_encaps_valid = true; int i; - flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (!mlx5e_is_offloaded_flow(flow)) + continue; esw_attr = flow->esw_attr; spec = &esw_attr->parse_attr->spec; - esw_attr->dests[efi->index].encap_id = e->encap_id; - esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; /* Flow can be associated with multiple encap entries. * Before offloading the flow verify that all of them have * a valid neighbour. @@ -1177,30 +1348,32 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, } mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); } } void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e, + struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - list_for_each_entry(efi, &e->flows, list) { - flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); /* mark the flow's encap dest as non-valid */ - flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1210,28 +1383,102 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, } mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); } /* we know that the encap is valid */ e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); } static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) return flow->esw_attr->counter; else return flow->nic_attr->counter; } +/* Takes reference to all flows attached to encap and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) +{ + struct encap_flow_item *efi; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + wait_for_completion(&flow->init_done); + + flow->tmp_efi_index = efi->index; + list_add(&flow->tmp_list, flow_list); + } +} + +/* Iterate over tmp_list of flows attached to flow_list head. */ +void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) + mlx5e_flow_put(priv, flow); +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_encap_entry *next = NULL; + +retry: + rcu_read_lock(); + + /* find encap with non-zero reference counter value */ + for (next = e ? + list_next_or_null_rcu(&nhe->encap_list, + &e->encap_list, + struct mlx5e_encap_entry, + encap_list) : + list_first_or_null_rcu(&nhe->encap_list, + struct mlx5e_encap_entry, + encap_list); + next; + next = list_next_or_null_rcu(&nhe->encap_list, + &next->encap_list, + struct mlx5e_encap_entry, + encap_list)) + if (mlx5e_encap_take(next)) + break; + + rcu_read_unlock(); + + /* release starting encap */ + if (e) + mlx5e_encap_put(netdev_priv(e->out_dev), e); + if (!next) + return next; + + /* wait for encap to be fully initialized */ + wait_for_completion(&next->res_ready); + /* continue searching if encap entry is not in valid state after completion */ + if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { + e = next; + goto retry; + } + + return next; +} + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + struct mlx5e_encap_entry *e = NULL; struct mlx5e_tc_flow *flow; - struct mlx5e_encap_entry *e; struct mlx5_fc *counter; struct neigh_table *tbl; bool neigh_used = false; @@ -1242,19 +1489,30 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) tbl = &arp_tbl; #if IS_ENABLED(CONFIG_IPV6) else if (m_neigh->family == AF_INET6) - tbl = &nd_tbl; + tbl = ipv6_stub->nd_tbl; #endif else return; - list_for_each_entry(e, &nhe->encap_list, encap_list) { - struct encap_flow_item *efi; - if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) - continue; - list_for_each_entry(efi, &e->flows, list) { + /* mlx5e_get_next_valid_encap() releases previous encap before returning + * next one. + */ + while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { + struct mlx5e_priv *priv = netdev_priv(e->out_dev); + struct encap_flow_item *efi, *tmp; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + list_add(&flow->tmp_list, &flow_list); + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); lastuse = mlx5_fc_query_lastuse(counter); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { @@ -1263,10 +1521,18 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) } } } - if (neigh_used) + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_put_encap_flow_list(priv, &flow_list); + if (neigh_used) { + /* release current encap before breaking the loop */ + mlx5e_encap_put(priv, e); break; + } } + trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); + if (neigh_used) { nhe->reported_lastuse = jiffies; @@ -1282,40 +1548,69 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) } } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { - struct list_head *next = flow->encaps[out_index].list.next; + WARN_ON(!list_empty(&e->flows)); - list_del(&flow->encaps[out_index].list); - if (list_empty(next)) { - struct mlx5e_encap_entry *e; - - e = list_entry(next, struct mlx5e_encap_entry, flows); + if (e->compl_result > 0) { mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + } - hash_del_rcu(&e->encap_hlist); - kfree(e->encap_header); - kfree(e); + kfree(e->encap_header); + kfree_rcu(e, rcu); +} + +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index) +{ + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; } + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); } static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; - if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->flags & MLX5E_TC_FLOW_DUP)) + if (!flow_flag_test(flow, ESWITCH) || + !flow_flag_test(flow, DUP)) return; mutex_lock(&esw->offloads.peer_mutex); list_del(&flow->peer); mutex_unlock(&esw->offloads.peer_mutex); - flow->flags &= ~MLX5E_TC_FLOW_DUP; + flow_flag_clear(flow, DUP); mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); kvfree(flow->peer_flow); @@ -1339,7 +1634,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (mlx5e_is_eswitch_flow(flow)) { mlx5e_tc_del_fdb_peer_flow(flow); mlx5e_tc_del_fdb_flow(priv, flow); } else { @@ -1596,7 +1891,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } } else if (*match_level != MLX5_MATCH_NONE) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); *match_level = MLX5_MATCH_L2; } @@ -1840,6 +2138,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; + bool is_eswitch_flow; int err; inner_match_level = MLX5_MATCH_NONE; @@ -1850,7 +2149,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? outer_match_level : inner_match_level; - if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + is_eswitch_flow = mlx5e_is_eswitch_flow(flow); + if (!err && is_eswitch_flow) { rep = rpriv->rep; if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && @@ -1864,7 +2164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, } } - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (is_eswitch_flow) { flow->esw_attr->inner_match_level = inner_match_level; flow->esw_attr->outer_match_level = outer_match_level; } else { @@ -2385,14 +2685,15 @@ static bool actions_match_supported(struct mlx5e_priv *priv, { u32 actions; - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) actions = flow->esw_attr->action; else actions = flow->nic_attr->action; - if (flow->flags & MLX5E_TC_FLOW_EGRESS && + if (flow_flag_test(flow, EGRESS) && !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP))) + (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@ -2542,7 +2843,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { parse_attr->mirred_ifindex[0] = peer_dev->ifindex; - flow->flags |= MLX5E_TC_FLOW_HAIRPIN; + flow_flag_set(flow, HAIRPIN); action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; } else { @@ -2629,6 +2930,31 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, +bool mlx5e_encap_take(struct mlx5e_encap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static struct mlx5e_encap_entry * +mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e; + struct encap_key e_key; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (!cmp_encap_info(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; +} + static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, @@ -2641,11 +2967,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; const struct ip_tunnel_info *tun_info; - struct encap_key key, e_key; + struct encap_key key; struct mlx5e_encap_entry *e; unsigned short family; uintptr_t hash_key; - bool found = false; int err = 0; parse_attr = attr->parse_attr; @@ -2660,57 +2985,78 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, hash_key = hash_encap_info(&key); - hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, - encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info->key; - e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, &key)) { - found = true; - break; - } - } + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key); /* must verify if encap is valid or not */ - if (found) + if (e) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result < 0) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + } e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready); e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); - if (err) + if (err) { + kfree(e); + e = NULL; goto out_err; + } INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + mutex_unlock(&esw->offloads.encap_tbl_lock); if (family == AF_INET) err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - if (err) + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; goto out_err; - - hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + } + e->compl_result = 1; attach_flow: + flow->encaps[out_index].e = e; list_add(&flow->encaps[out_index].list, &e->flows); flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - attr->dests[out_index].encap_id = e->encap_id; + attr->dests[out_index].pkt_reformat = e->pkt_reformat; attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; *encap_valid = true; } else { *encap_valid = false; } + mutex_unlock(&esw->offloads.encap_tbl_lock); return err; out_err: - kfree(e); + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); return err; } @@ -2890,12 +3236,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + struct net_device *uplink_upper; + rcu_read_lock(); + uplink_upper = + netdev_master_upper_dev_get_rcu(uplink_dev); if (uplink_upper && netif_is_lag_master(uplink_upper) && uplink_upper == out_dev) out_dev = uplink_dev; + rcu_read_unlock(); if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, @@ -3066,19 +3416,19 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return 0; } -static void get_flags(int flags, u16 *flow_flags) +static void get_flags(int flags, unsigned long *flow_flags) { - u16 __flow_flags = 0; + unsigned long __flow_flags = 0; - if (flags & MLX5E_TC_INGRESS) - __flow_flags |= MLX5E_TC_FLOW_INGRESS; - if (flags & MLX5E_TC_EGRESS) - __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5_TC_FLAG(INGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); + if (flags & MLX5_TC_FLAG(EGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); - if (flags & MLX5E_TC_ESW_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_ESWITCH; - if (flags & MLX5E_TC_NIC_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_NIC; + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); *flow_flags = __flow_flags; } @@ -3090,12 +3440,13 @@ static const struct rhashtable_params tc_ht_params = { .automatic_shrinking = true, }; -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, + unsigned long flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv; - if (flags & MLX5E_TC_ESW_OFFLOAD) { + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); return &uplink_rpriv->uplink_priv.tc_ht; } else /* NIC offload */ @@ -3106,7 +3457,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { struct mlx5_esw_flow_attr *attr = flow->esw_attr; bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && - flow->flags & MLX5E_TC_FLOW_INGRESS; + flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, @@ -3125,13 +3476,13 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, - struct flow_cls_offload *f, u16 flow_flags, + struct flow_cls_offload *f, unsigned long flow_flags, struct mlx5e_tc_flow_parse_attr **__parse_attr, struct mlx5e_tc_flow **__flow) { struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; - int err; + int out_index, err; flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@ -3143,6 +3494,12 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->cookie = f->cookie; flow->flags = flow_flags; flow->priv = priv; + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + INIT_LIST_HEAD(&flow->encaps[out_index].list); + INIT_LIST_HEAD(&flow->mod_hdr); + INIT_LIST_HEAD(&flow->hairpin); + refcount_set(&flow->refcnt, 1); + init_completion(&flow->init_done); *__flow = flow; *__parse_attr = parse_attr; @@ -3182,7 +3539,7 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) @@ -3193,7 +3550,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; int attr_size, err; - flow_flags |= MLX5E_TC_FLOW_ESWITCH; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); attr_size = sizeof(struct mlx5_esw_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@ -3215,6 +3572,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, goto err_free; err = mlx5e_tc_add_fdb_flow(priv, flow, extack); + complete_all(&flow->init_done); if (err) { if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) goto err_free; @@ -3225,15 +3583,14 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return flow; err_free: - kfree(flow); - kvfree(parse_attr); + mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); } static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, - u16 flow_flags) + unsigned long flow_flags) { struct mlx5e_priv *priv = flow->priv, *peer_priv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; @@ -3271,7 +3628,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, } flow->peer_flow = peer_flow; - flow->flags |= MLX5E_TC_FLOW_DUP; + flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); list_add_tail(&flow->peer, &esw->offloads.peer_flows); mutex_unlock(&esw->offloads.peer_mutex); @@ -3284,7 +3641,7 @@ out: static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@ -3318,7 +3675,7 @@ out: static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@ -3332,7 +3689,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) return -EOPNOTSUPP; - flow_flags |= MLX5E_TC_FLOW_NIC; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); attr_size = sizeof(struct mlx5_nic_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@ -3353,14 +3710,14 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto err_free; - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + flow_flag_set(flow, OFFLOADED); kvfree(parse_attr); *__flow = flow; return 0; err_free: - kfree(flow); + mlx5e_flow_put(priv, flow); kvfree(parse_attr); out: return err; @@ -3369,12 +3726,12 @@ out: static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - int flags, + unsigned long flags, struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u16 flow_flags; + unsigned long flow_flags; int err; get_flags(flags, &flow_flags); @@ -3393,14 +3750,16 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, } int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0; - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + rcu_read_unlock(); if (flow) { NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); @@ -3411,55 +3770,68 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, goto out; } + trace_mlx5e_configure_flower(f); err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); if (err) goto out; - err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) goto err_free; return 0; err_free: - mlx5e_tc_del_flow(priv, flow); - kfree(flow); + mlx5e_flow_put(priv, flow); out: return err; } -#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS) -#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS) - static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) { - if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK)) - return true; + bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); + bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); - return false; + return flow_flag_test(flow, INGRESS) == dir_ingress && + flow_flag_test(flow, EGRESS) == dir_egress; } int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; + int err; + rcu_read_lock(); flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + if (!flow || !same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag + * set. + */ + if (flow_flag_test_and_set(flow, DELETED)) { + err = -EINVAL; + goto errout; + } rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + rcu_read_unlock(); - mlx5e_tc_del_flow(priv, flow); - - kfree(flow); + trace_mlx5e_delete_flower(f); + mlx5e_flow_put(priv, flow); return 0; + +errout: + rcu_read_unlock(); + return err; } int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@ -3469,15 +3841,24 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, u64 lastuse = 0; u64 packets = 0; u64 bytes = 0; + int err = 0; - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + rcu_read_lock(); + flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, + tc_ht_params)); + rcu_read_unlock(); + if (IS_ERR(flow)) + return PTR_ERR(flow); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (!same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); if (!counter) - return 0; + goto errout; mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); } @@ -3489,8 +3870,8 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, if (!peer_esw) goto out; - if ((flow->flags & MLX5E_TC_FLOW_DUP) && - (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + if (flow_flag_test(flow, DUP) && + flow_flag_test(flow->peer_flow, OFFLOADED)) { u64 bytes2; u64 packets2; u64 lastuse2; @@ -3509,15 +3890,118 @@ no_peer_counter: mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: flow_stats_update(&f->stats, bytes, packets, lastuse); + trace_mlx5e_stats_flower(f); +errout: + mlx5e_flow_put(priv, flow); + return err; +} + +static int apply_police_params(struct mlx5e_priv *priv, u32 rate, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u16 vport_num; + u32 rate_mbps; + int err; + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + vport_num = rpriv->rep->vport; + + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; +} + +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + } return 0; } +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + int prio = TC_H_MAJ(ma->common.prio) >> 16; + + if (prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); +} + +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); +} + +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + cur_stats = priv->stats.vf_vport; + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); +} + static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *peer_mdev = peer_priv->mdev; - struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin_entry *hpe, *tmp; + LIST_HEAD(init_wait_list); u16 peer_vhca_id; int bkt; @@ -3526,9 +4010,18 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); - hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { - if (hpe->peer_vhca_id == peer_vhca_id) + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) + if (refcount_inc_not_zero(&hpe->refcnt)) + list_add(&hpe->dead_peer_wait_list, &init_wait_list); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { + wait_for_completion(&hpe->res_ready); + if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) hpe->hp->pair->peer_gone = true; + + mlx5e_hairpin_put(priv, hpe); } } @@ -3564,7 +4057,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) struct mlx5e_tc_table *tc = &priv->fs.tc; int err; - hash_init(tc->mod_hdr_tbl); + mutex_init(&tc->t_lock); + mutex_init(&tc->mod_hdr.lock); + hash_init(tc->mod_hdr.hlist); + mutex_init(&tc->hairpin_tbl_lock); hash_init(tc->hairpin_tbl); err = rhashtable_init(&tc->ht, &tc_ht_params); @@ -3596,12 +4092,16 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier(&tc->netdevice_nb); + mutex_destroy(&tc->mod_hdr.lock); + mutex_destroy(&tc->hairpin_tbl_lock); + rhashtable_destroy(&tc->ht); if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); tc->t = NULL; } + mutex_destroy(&tc->t_lock); } int mlx5e_tc_esw_init(struct rhashtable *tc_ht) @@ -3614,7 +4114,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); } -int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@ -3636,10 +4136,10 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work) reoffload_flows_work); struct mlx5e_tc_flow *flow, *tmp; - rtnl_lock(); + mutex_lock(&rpriv->unready_flows_lock); list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) - remove_unready_flow(flow); + unready_flow_del(flow); } - rtnl_unlock(); + mutex_unlock(&rpriv->unready_flows_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 3ab39275ca7d..924c6ef86a14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -40,13 +40,15 @@ #ifdef CONFIG_MLX5_ESWITCH enum { - MLX5E_TC_INGRESS = BIT(0), - MLX5E_TC_EGRESS = BIT(1), - MLX5E_TC_NIC_OFFLOAD = BIT(2), - MLX5E_TC_ESW_OFFLOAD = BIT(3), - MLX5E_TC_LAST_EXPORTED_BIT = 3, + MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, }; +#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT) + int mlx5e_tc_nic_init(struct mlx5e_priv *priv); void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); @@ -54,23 +56,37 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags); + struct flow_cls_offload *f, unsigned long flags); int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags); + struct flow_cls_offload *f, unsigned long flags); int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags); + struct flow_cls_offload *f, unsigned long flags); + +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma); struct mlx5e_encap_entry; void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); + struct mlx5e_encap_entry *e, + struct list_head *flow_list); void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); + struct mlx5e_encap_entry *e, + struct list_head *flow_list); +bool mlx5e_encap_take(struct mlx5e_encap_entry *e); +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); + +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list); +void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); struct mlx5e_neigh_hash_entry; void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); -int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); @@ -80,7 +96,11 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; } +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, + unsigned long flags) +{ + return 0; +} #endif #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 600e92cb629a..d3a67a9b4eba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -210,7 +210,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, @@ -292,8 +292,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { - u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ? - MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode; + u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb); opcode = MLX5_OPCODE_SEND; mss = 0; @@ -608,9 +607,11 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { + u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb); + opcode = MLX5_OPCODE_SEND; mss = 0; - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + ihs = mlx5e_calc_min_inline(mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); stats->packets++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 49b06b256c92..257a7c9f7a14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include "en.h" #include "en/xdp.h" +#include "en/xsk/rx.h" #include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) @@ -81,6 +82,29 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } +static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) +{ + bool busy_xsk = false, xsk_rx_alloc_err; + + /* Handle the race between the application querying need_wakeup and the + * driver setting it: + * 1. Update need_wakeup both before and after the TX. If it goes to + * "yes", it can only happen with the first update. + * 2. If the application queried need_wakeup before we set it, the + * packets will be transmitted anyway, even w/o a wakeup. + * 3. Give a chance to clear need_wakeup after new packets were queued + * for TX. + */ + mlx5e_xsk_update_tx_wakeup(xsksq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + mlx5e_xsk_update_tx_wakeup(xsksq); + + xsk_rx_alloc_err = xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); + + return busy_xsk; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -122,8 +146,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (xsk_open) { mlx5e_poll_ico_cq(&c->xskicosq.cq); busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); - busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); - busy_xsk |= xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq); } busy |= busy_xsk; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 41f25ea2e8d9..580c71cb9dfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -215,11 +215,7 @@ static int mlx5_eq_async_int(struct notifier_block *nb, */ dma_rmb(); - if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) - atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); - else - mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); - + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; @@ -328,10 +324,13 @@ err_buf: /** * mlx5_eq_enable - Enable EQ for receiving EQEs - * @dev - Device which owns the eq - * @eq - EQ to enable - * @nb - notifier call block - * mlx5_eq_enable - must be called after EQ is created in device. + * @dev : Device which owns the eq + * @eq : EQ to enable + * @nb : Notifier call block + * + * Must be called after EQ is created in device. + * + * @return: 0 if no error */ int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct notifier_block *nb) @@ -348,11 +347,12 @@ int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, EXPORT_SYMBOL(mlx5_eq_enable); /** - * mlx5_eq_disable - Enable EQ for receiving EQEs - * @dev - Device which owns the eq - * @eq - EQ to disable - * @nb - notifier call block - * mlx5_eq_disable - must be called before EQ is destroyed. + * mlx5_eq_disable - Disable EQ for receiving EQEs + * @dev : Device which owns the eq + * @eq : EQ to disable + * @nb : Notifier call block + * + * Must be called before EQ is destroyed. */ void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct notifier_block *nb) @@ -415,7 +415,7 @@ void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) int mlx5_eq_table_init(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table; - int i, err; + int i; eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); if (!eq_table) @@ -423,9 +423,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) dev->priv.eq_table = eq_table; - err = mlx5_eq_debugfs_init(dev); - if (err) - goto kvfree_eq_table; + mlx5_eq_debugfs_init(dev); mutex_init(&eq_table->lock); for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) @@ -433,11 +431,6 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) eq_table->irq_table = dev->priv.irq_table; return 0; - -kvfree_eq_table: - kvfree(eq_table); - dev->priv.eq_table = NULL; - return err; } void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) @@ -945,9 +938,6 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { struct mlx5_eq_table *eqt = dev->priv.eq_table; - if (nb->event_type >= MLX5_EVENT_TYPE_MAX) - return -EINVAL; - return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); } EXPORT_SYMBOL(mlx5_eq_notifier_register); @@ -956,9 +946,6 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { struct mlx5_eq_table *eqt = dev->priv.eq_table; - if (nb->event_type >= MLX5_EVENT_TYPE_MAX) - return -EINVAL; - return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } EXPORT_SYMBOL(mlx5_eq_notifier_unregister); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1f3891fde2eb..30aae76b6a1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -58,20 +58,9 @@ struct vport_addr { bool mc_promisc; }; -enum { - UC_ADDR_CHANGE = BIT(0), - MC_ADDR_CHANGE = BIT(1), - PROMISC_CHANGE = BIT(3), -}; - static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw); static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); -/* Vport context events */ -#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ - MC_ADDR_CHANGE | \ - PROMISC_CHANGE) - struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { @@ -108,13 +97,13 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); - if (events_mask & UC_ADDR_CHANGE) + if (events_mask & MLX5_VPORT_UC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_uc_address_change, 1); - if (events_mask & MC_ADDR_CHANGE) + if (events_mask & MLX5_VPORT_MC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_mc_address_change, 1); - if (events_mask & PROMISC_CHANGE) + if (events_mask & MLX5_VPORT_PROMISC_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_promisc_change, 1); @@ -463,6 +452,22 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw) return err; } +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +static int esw_legacy_enable(struct mlx5_eswitch *esw) +{ + int ret; + + ret = esw_create_legacy_table(esw); + if (ret) + return ret; + + mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + return 0; +} + static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) { esw_cleanup_vepa_rules(esw); @@ -470,6 +475,19 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) esw_destroy_legacy_vepa_table(esw); } +static void esw_legacy_disable(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + + mlx5_eswitch_disable_pf_vf_vports(esw); + + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_legacy_table(esw); +} + /* E-Switch vport UC/MC lists management */ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, struct vport_addr *vaddr); @@ -901,21 +919,21 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport) esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); - if (vport->enabled_events & UC_ADDR_CHANGE) { + if (vport->enabled_events & MLX5_VPORT_UC_ADDR_CHANGE) { esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); } - if (vport->enabled_events & MC_ADDR_CHANGE) + if (vport->enabled_events & MLX5_VPORT_MC_ADDR_CHANGE) esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); - if (vport->enabled_events & PROMISC_CHANGE) { + if (vport->enabled_events & MLX5_VPORT_PROMISC_CHANGE) { esw_update_vport_rx_mode(esw, vport); if (!IS_ERR_OR_NULL(vport->allmulti_rule)) esw_update_vport_mc_promisc(esw, vport); } - if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) + if (vport->enabled_events & (MLX5_VPORT_PROMISC_CHANGE | MLX5_VPORT_MC_ADDR_CHANGE)) esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); @@ -1393,18 +1411,49 @@ out: return err; } +static bool element_type_supported(struct mlx5_eswitch *esw, int type) +{ + const struct mlx5_core_dev *dev = esw->dev; + + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + /* Vport QoS management */ -static int esw_create_tsar(struct mlx5_eswitch *esw) +static void esw_create_tsar(struct mlx5_eswitch *esw) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return 0; + return; + + if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return; if (esw->qos.enabled) - return -EEXIST; + return; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1412,11 +1461,10 @@ static int esw_create_tsar(struct mlx5_eswitch *esw) &esw->qos.root_tsar_id); if (err) { esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err); - return err; + return; } esw->qos.enabled = true; - return 0; } static void esw_destroy_tsar(struct mlx5_eswitch *esw) @@ -1537,6 +1585,22 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, return 0; } +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW); +} + static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) { ((u8 *)node_guid)[7] = mac[0]; @@ -1619,7 +1683,7 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) } static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - int enable_events) + enum mlx5_eswitch_vport_event enabled_events) { u16 vport_num = vport->vport; @@ -1641,7 +1705,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); /* Sync with current vport context */ - vport->enabled_events = enable_events; + vport->enabled_events = enabled_events; vport->enabled = true; /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well @@ -1770,11 +1834,46 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs + * whichever are present on the eswitch. + */ +void +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events) { struct mlx5_vport *vport; + int i; + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_enable_vport(esw, vport, enabled_events); + + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + esw_enable_vport(esw, vport, enabled_events); +} + +/* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs + * whichever are previously enabled on the eswitch. + */ +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i; + + mlx5_esw_for_all_vports_reverse(esw, i, vport) + esw_disable_vport(esw, vport); +} + +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +{ int err; - int i, enabled_events; if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { @@ -1788,44 +1887,23 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "engress ACL is not supported by FW\n"); + esw_create_tsar(esw); + esw->mode = mode; mlx5_lag_update(esw->dev); if (mode == MLX5_ESWITCH_LEGACY) { - err = esw_create_legacy_table(esw); - if (err) - goto abort; + err = esw_legacy_enable(esw); } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw); + err = esw_offloads_enable(esw); } if (err) goto abort; - err = esw_create_tsar(esw); - if (err) - esw_warn(esw->dev, "Failed to create eswitch TSAR"); - - enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS : - UC_ADDR_CHANGE; - - /* Enable PF vport */ - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_enable_vport(esw, vport, enabled_events); - - /* Enable ECPF vports */ - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - esw_enable_vport(esw, vport, enabled_events); - } - - /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) - esw_enable_vport(esw, vport, enabled_events); - mlx5_eswitch_event_handlers_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", @@ -1847,10 +1925,7 @@ abort: void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { - struct esw_mc_addr *mc_promisc; - struct mlx5_vport *vport; int old_mode; - int i; if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) return; @@ -1859,21 +1934,14 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); - mc_promisc = &esw->mc_promisc; mlx5_eswitch_event_handlers_unregister(esw); - mlx5_esw_for_all_vports(esw, i, vport) - esw_disable_vport(esw, vport); - - if (mc_promisc && mc_promisc->uplink_rule) - mlx5_del_flow_rules(mc_promisc->uplink_rule); - - esw_destroy_tsar(esw); - if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_destroy_legacy_table(esw); + esw_legacy_disable(esw); else if (esw->mode == MLX5_ESWITCH_OFFLOADS) - esw_offloads_cleanup(esw); + esw_offloads_disable(esw); + + esw_destroy_tsar(esw); old_mode = esw->mode; esw->mode = MLX5_ESWITCH_NONE; @@ -1931,8 +1999,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (err) goto abort; + mutex_init(&esw->offloads.encap_tbl_lock); hash_init(esw->offloads.encap_tbl); - hash_init(esw->offloads.mod_hdr_tbl); + mutex_init(&esw->offloads.mod_hdr.lock); + hash_init(esw->offloads.mod_hdr.hlist); + atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); mlx5_esw_for_all_vports(esw, i, vport) { @@ -1968,6 +2039,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->offloads.mod_hdr.lock); + mutex_destroy(&esw->offloads.encap_tbl_lock); kfree(esw->vports); kfree(esw); } @@ -2085,23 +2158,19 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan > 4095 || qos > 7) return -EINVAL; - mutex_lock(&esw->state_lock); - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) - goto unlock; + return err; evport->info.vlan = vlan; evport->info.qos = qos; if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto unlock; + return err; err = esw_vport_egress_config(esw, evport); } -unlock: - mutex_unlock(&esw->state_lock); return err; } @@ -2109,11 +2178,16 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos) { u8 set_flags = 0; + int err; if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; - return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_lock(&esw->state_lock); + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_unlock(&esw->state_lock); + + return err; } int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 04685dbb280c..6bd6f5895244 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> +#include <linux/atomic.h> #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> @@ -68,7 +69,7 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; - int modify_metadata_id; + struct mlx5_modify_hdr *modify_metadata; struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; @@ -101,6 +102,13 @@ struct mlx5_vport_info { bool trusted; }; +/* Vport context events */ +enum mlx5_eswitch_vport_event { + MLX5_VPORT_UC_ADDR_CHANGE = BIT(0), + MLX5_VPORT_MC_ADDR_CHANGE = BIT(1), + MLX5_VPORT_PROMISC_CHANGE = BIT(3), +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -122,7 +130,7 @@ struct mlx5_vport { } qos; bool enabled; - u16 enabled_events; + enum mlx5_eswitch_vport_event enabled_events; }; enum offloads_fdb_flags { @@ -145,6 +153,7 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { + struct mlx5_flow_namespace *ns; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *peer_miss_grp; @@ -173,13 +182,14 @@ struct mlx5_esw_offload { struct mlx5_eswitch_rep *vport_reps; struct list_head peer_flows; struct mutex peer_mutex; + struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); - DECLARE_HASHTABLE(mod_hdr_tbl, 8); + struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; - u64 num_flows; + atomic64_t num_flows; enum devlink_eswitch_encap_mode encap; }; @@ -207,8 +217,11 @@ enum { struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; + /* legacy data structures */ struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct esw_mc_addr mc_promisc; + /* end of legacy */ struct workqueue_struct *work_queue; struct mlx5_vport *vports; u32 flags; @@ -218,7 +231,6 @@ struct mlx5_eswitch { * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr mc_promisc; struct { bool enabled; @@ -233,8 +245,8 @@ struct mlx5_eswitch { struct mlx5_esw_functions esw_funcs; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw); -int esw_offloads_init(struct mlx5_eswitch *esw); +void esw_offloads_disable(struct mlx5_eswitch *esw); +int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, @@ -251,6 +263,8 @@ void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); @@ -372,11 +386,11 @@ struct mlx5_esw_flow_attr { struct { u32 flags; struct mlx5_eswitch_rep *rep; + struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; - u32 encap_id; struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; - u32 mod_hdr_id; + struct mlx5_modify_hdr *modify_hdr; u8 inner_match_level; u8 outer_match_level; struct mlx5_fc *counter; @@ -513,6 +527,11 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); (vport) = &(esw)->vports[i], \ (i) < (esw)->total_vports; (i)++) +#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \ + for ((i) = (esw)->total_vports - 1; \ + (vport) = &(esw)->vports[i], \ + (i) >= MLX5_VPORT_PF; (i)--) + #define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ for ((i) = MLX5_VPORT_FIRST_VF; \ (vport) = &(esw)->vports[(i)], \ @@ -574,6 +593,11 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); +void +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0323fd078271..00d71db15f22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -190,10 +190,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_FLOW_DEST_VPORT_VHCA_ID; if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act.reformat_id = attr->dests[j].encap_id; + flow_act.pkt_reformat = attr->dests[j].pkt_reformat; dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.reformat_id = - attr->dests[j].encap_id; + dest[i].vport.pkt_reformat = + attr->dests[j].pkt_reformat; } i++; } @@ -213,7 +213,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - flow_act.modify_id = attr->mod_hdr_id; + flow_act.modify_hdr = attr->modify_hdr; fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split); if (IS_ERR(fdb)) { @@ -229,7 +229,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (IS_ERR(rule)) goto err_add_rule; else - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows); return rule; @@ -276,7 +276,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.reformat_id = attr->dests[i].encap_id; + dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat; } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -294,7 +294,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, if (IS_ERR(rule)) goto add_err; - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows); return rule; add_err: @@ -322,7 +322,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); } - esw->offloads.num_flows--; + atomic64_dec(&esw->offloads.num_flows); if (fwd_rule) { esw_put_prio_table(esw, attr->chain, attr->prio, 1); @@ -438,9 +438,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !attr->dest_chain); + mutex_lock(&esw->state_lock); + err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) - return err; + goto unlock; attr->vlan_handled = false; @@ -453,11 +455,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, attr->vlan_handled = true; } - return 0; + goto unlock; } if (!push && !pop) - return 0; + goto unlock; if (!(offloads->vlan_push_pop_refcount)) { /* it's the 1st vlan rule, apply global vlan pop policy */ @@ -482,6 +484,8 @@ skip_set_push: out: if (!err) attr->vlan_handled = true; +unlock: + mutex_unlock(&esw->state_lock); return err; } @@ -504,6 +508,8 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + mutex_lock(&esw->state_lock); + vport = esw_vlan_action_get_vport(attr, push, pop); if (!push && !pop && fwd) { @@ -511,7 +517,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; - return 0; + goto out; } if (push) { @@ -529,12 +535,13 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, skip_unset_push: offloads->vlan_push_pop_refcount--; if (offloads->vlan_push_pop_refcount) - return 0; + goto out; /* no more vlan rules, stop global vlan pop policy */ err = esw_set_global_vlan_pop(esw, 0); out: + mutex_unlock(&esw->state_lock); return err; } @@ -583,38 +590,15 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) { u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; u8 fdb_to_vport_reg_c_id; int err; - err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, - out, sizeof(out)); - if (err) - return err; - - fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.fdb_to_vport_reg_c_id); - - fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; - MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); - - MLX5_SET(modify_esw_vport_context_in, in, - field_select.fdb_to_vport_reg_c_id, 1); - - return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, - in, sizeof(in)); -} - -static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; - u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; - u8 fdb_to_vport_reg_c_id; - int err; + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, out, sizeof(out)); @@ -624,7 +608,10 @@ static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, esw_vport_context.fdb_to_vport_reg_c_id); - fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + if (enable) + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + else + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); @@ -1081,6 +1068,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) err = -EOPNOTSUPP; goto ns_err; } + esw->fdb_table.offloads.ns = root_ns; + err = mlx5_flow_namespace_set_mode(root_ns, + esw->dev->priv.steering->mode); + if (err) { + esw_warn(dev, "Failed to set FDB namespace steering mode\n"); + goto ns_err; + } max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | MLX5_CAP_GEN(dev, max_flow_counter_15_0); @@ -1220,6 +1214,8 @@ send_vport_err: esw_destroy_offloads_fast_fdb_tables(esw); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS); ns_err: kvfree(flow_group_in); return err; @@ -1239,6 +1235,9 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); esw_destroy_offloads_fast_fdb_tables(esw); + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, + MLX5_FLOW_STEERING_MODE_DMFS); } static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) @@ -1402,10 +1401,9 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vports = esw->total_vports; - struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN], rep_type; int vport_index; + u8 rep_type; esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), @@ -1413,12 +1411,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport_index, rep) { rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); rep->vport_index = vport_index; - ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) atomic_set(&rep->rep_data[rep_type].state, @@ -1640,13 +1635,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) esw_del_fdb_peer_miss_rules(esw); } +static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + bool pair) +{ + struct mlx5_flow_root_namespace *peer_ns; + struct mlx5_flow_root_namespace *ns; + int err; + + peer_ns = peer_esw->dev->priv.steering->fdb_root_ns; + ns = esw->dev->priv.steering->fdb_root_ns; + + if (pair) { + err = mlx5_flow_namespace_set_peer(ns, peer_ns); + if (err) + return err; + + err = mlx5_flow_namespace_set_peer(peer_ns, ns); + if (err) { + mlx5_flow_namespace_set_peer(ns, NULL); + return err; + } + } else { + mlx5_flow_namespace_set_peer(ns, NULL); + mlx5_flow_namespace_set_peer(peer_ns, NULL); + } + + return 0; +} + static int mlx5_esw_offloads_devcom_event(int event, void *my_data, void *event_data) { struct mlx5_eswitch *esw = my_data; - struct mlx5_eswitch *peer_esw = event_data; struct mlx5_devcom *devcom = esw->dev->priv.devcom; + struct mlx5_eswitch *peer_esw = event_data; int err; switch (event) { @@ -1655,9 +1679,12 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; - err = mlx5_esw_offloads_pair(esw, peer_esw); + err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); if (err) goto err_out; + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_peer; err = mlx5_esw_offloads_pair(peer_esw, esw); if (err) @@ -1673,6 +1700,7 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); mlx5_esw_offloads_unpair(peer_esw); mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); break; } @@ -1680,7 +1708,8 @@ static int mlx5_esw_offloads_devcom_event(int event, err_pair: mlx5_esw_offloads_unpair(esw); - +err_peer: + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); err_out: mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", event, err); @@ -1751,7 +1780,7 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, if (vport->ingress.modify_metadata_rule) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - flow_act.modify_id = vport->ingress.modify_metadata_id; + flow_act.modify_hdr = vport->ingress.modify_metadata; } vport->ingress.allow_rule = @@ -1787,9 +1816,11 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, MLX5_SET(set_action_in, action, data, mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); - err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - 1, action, &vport->ingress.modify_metadata_id); - if (err) { + vport->ingress.modify_metadata = + mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action); + if (IS_ERR(vport->ingress.modify_metadata)) { + err = PTR_ERR(vport->ingress.modify_metadata); esw_warn(esw->dev, "failed to alloc modify header for vport %d ingress acl (%d)\n", vport->vport, err); @@ -1797,7 +1828,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; - flow_act.modify_id = vport->ingress.modify_metadata_id; + flow_act.modify_hdr = vport->ingress.modify_metadata; vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, &spec, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.modify_metadata_rule)) { @@ -1811,7 +1842,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, out: if (err) - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata); return err; } @@ -1820,7 +1851,7 @@ void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, { if (vport->ingress.modify_metadata_rule) { mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata); vport->ingress.modify_metadata_rule = NULL; } @@ -2120,7 +2151,7 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type return NOTIFY_OK; } -int esw_offloads_init(struct mlx5_eswitch *esw) +int esw_offloads_enable(struct mlx5_eswitch *esw) { int err; @@ -2130,15 +2161,16 @@ int esw_offloads_init(struct mlx5_eswitch *esw) else esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + mlx5_rdma_enable_roce(esw->dev); err = esw_offloads_steering_init(esw); if (err) - return err; + goto err_steering_init; - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - err = mlx5_eswitch_enable_passing_vport_metadata(esw); - if (err) - goto err_vport_metadata; - } + err = esw_set_passing_vport_metadata(esw, true); + if (err) + goto err_vport_metadata; + + mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); err = esw_offloads_load_all_reps(esw); if (err) @@ -2147,15 +2179,15 @@ int esw_offloads_init(struct mlx5_eswitch *esw) esw_offloads_devcom_init(esw); mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); - return 0; err_reps: - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); + esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); +err_steering_init: + mlx5_rdma_disable_roce(esw->dev); return err; } @@ -2178,14 +2210,14 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw) +void esw_offloads_disable(struct mlx5_eswitch *esw) { - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw); - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - mlx5_eswitch_disable_passing_vport_metadata(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); + esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + mlx5_rdma_disable_roce(esw->dev); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } @@ -2345,7 +2377,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, break; } - if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); return -EOPNOTSUPP; @@ -2455,7 +2487,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (esw->offloads.encap == encap) return 0; - if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 7ac1249eadc3..579c306caa7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -107,6 +107,50 @@ static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns, return 0; } +static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + +static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ +} + +static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + return 0; +} + +static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ +} + +static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + return 0; +} + +static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) @@ -182,7 +226,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, } else { MLX5_SET(create_flow_table_in, in, flow_table_context.table_miss_action, - ns->def_miss_action); + ft->def_miss_action); } break; @@ -262,7 +306,7 @@ static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns, } else { MLX5_SET(modify_flow_table_in, in, flow_table_context.table_miss_action, - ns->def_miss_action); + ft->def_miss_action); } } @@ -412,11 +456,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } else { MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.reformat_id); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); } - MLX5_SET(flow_context, in_flow_context, modify_header_id, - fte->action.modify_id); + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); @@ -468,7 +514,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); MLX5_SET(extended_dest_format, in_dests, packet_reformat_id, - dst->dest_attr.vport.reformat_id); + dst->dest_attr.vport.pkt_reformat->id); } break; default: @@ -566,7 +612,9 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id) { u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; @@ -574,6 +622,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) MLX5_SET(alloc_flow_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -581,6 +630,11 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) return err; } +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +{ + return mlx5_cmd_fc_bulk_alloc(dev, 0, id); +} + int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) { u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; @@ -615,77 +669,35 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, return 0; } -struct mlx5_cmd_fc_bulk { - u32 id; - int num; - int outlen; - u32 out[0]; -}; - -struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) -{ - struct mlx5_cmd_fc_bulk *b; - int outlen = - MLX5_ST_SZ_BYTES(query_flow_counter_out) + - MLX5_ST_SZ_BYTES(traffic_counter) * num; - - b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); - if (!b) - return NULL; - - b->id = id; - b->num = num; - b->outlen = outlen; - - return b; -} - -void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len) { - kfree(b); + return MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len; } -int -mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out) { + int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len); u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; MLX5_SET(query_flow_counter_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_COUNTER); MLX5_SET(query_flow_counter_in, in, op_mod, 0); - MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); - MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); - return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u32 id, - u64 *packets, u64 *bytes) -{ - int index = id - b->id; - void *stats; - - if (index < 0 || index >= b->num) { - mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", - id, b->id, b->id + b->num - 1); - return; - } - - stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, - flow_statistics[index]); - *packets = MLX5_GET64(traffic_counter, stats, packets); - *bytes = MLX5_GET64(traffic_counter, stats, octets); -} - -int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, - int reformat_type, - size_t size, - void *reformat_data, - enum mlx5_flow_namespace_type namespace, - u32 *packet_reformat_id) +static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) { u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)]; + struct mlx5_core_dev *dev = ns->dev; void *packet_reformat_context_in; int max_encap_size; void *reformat; @@ -728,35 +740,36 @@ int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out, - out, packet_reformat_id); + pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); kfree(in); return err; } -EXPORT_SYMBOL(mlx5_packet_reformat_alloc); -void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, - u32 packet_reformat_id) +static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) { u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)]; + struct mlx5_core_dev *dev = ns->dev; memset(in, 0, sizeof(in)); MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, - packet_reformat_id); + pkt_reformat->id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); -int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, - u8 namespace, u8 num_actions, - void *modify_actions, u32 *modify_header_id) +static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) { u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)]; int max_actions, actions_size, inlen, err; + struct mlx5_core_dev *dev = ns->dev; void *actions_in; u8 table_type; u32 *in; @@ -807,26 +820,26 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); kfree(in); return err; } -EXPORT_SYMBOL(mlx5_modify_header_alloc); -void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) +static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) { u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)]; + struct mlx5_core_dev *dev = ns->dev; memset(in, 0, sizeof(in)); MLX5_SET(dealloc_modify_header_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, - modify_header_id); + modify_hdr->id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -EXPORT_SYMBOL(mlx5_modify_header_dealloc); static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, @@ -838,6 +851,13 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = { .update_fte = mlx5_cmd_update_fte, .delete_fte = mlx5_cmd_delete_fte, .update_root_ft = mlx5_cmd_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_modify_header_dealloc, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, }; static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { @@ -850,9 +870,16 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { .update_fte = mlx5_cmd_stub_update_fte, .delete_fte = mlx5_cmd_stub_delete_fte, .update_root_ft = mlx5_cmd_stub_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, }; -static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) { return &mlx5_flow_cmds; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index e340f9af2f5a..d62de642eca9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -75,24 +75,45 @@ struct mlx5_flow_cmds { struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect); + + int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat); + + void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat); + + int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr); + + void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr); + + int (*set_peer)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + + int (*create_ns)(struct mlx5_flow_root_namespace *ns); + int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); -struct mlx5_cmd_fc_bulk; - -struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); -void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); -int -mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); -void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u32 id, - u64 *packets, u64 *bytes); +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len); +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out); const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 3e99799bdb40..3bbb49354829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -60,7 +60,8 @@ ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ __VA_ARGS__)\ -#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ +#define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE, \ + .def_miss_action = def_miss_act,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } @@ -131,33 +132,41 @@ static struct init_tree_node { int num_leaf_prios; int prio; int num_levels; + enum mlx5_flow_table_miss_action def_miss_action; } root_fs = { .type = FS_TYPE_NAMESPACE, .ar_size = 7, - .children = (struct init_tree_node[]) { - ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, - BY_PASS_PRIO_NUM_LEVELS))), - ADD_PRIO(0, LAG_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, - LAG_PRIO_NUM_LEVELS))), - ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), - ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, - ETHTOOL_PRIO_NUM_LEVELS))), - ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), - ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, - KERNEL_NIC_PRIO_NUM_LEVELS))), - ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_CHAINING_CAPS, - ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), - ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), + .children = (struct init_tree_node[]){ + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, + LAG_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, + OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, + KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, + LEFTOVERS_NUM_LEVELS))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, + ANCHOR_NUM_LEVELS))), } }; @@ -167,8 +176,29 @@ static struct init_tree_node egress_root_fs = { .children = (struct init_tree_node[]) { ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, FS_CHAINING_CAPS_EGRESS, - ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + } +}; + +#define RDMA_RX_BYPASS_PRIO 0 +#define RDMA_RX_KERNEL_PRIO 1 +static struct init_tree_node rdma_rx_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 2, + .children = (struct init_tree_node[]) { + [RDMA_RX_BYPASS_PRIO] = + ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + [RDMA_RX_KERNEL_PRIO] = + ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN, + ADD_MULTIPLE_PRIO(1, 1))), } }; @@ -1014,6 +1044,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); + ft->def_miss_action = ns->def_miss_action; err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft); if (err) goto free_ft; @@ -1384,7 +1415,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? (d1->vport.vhca_id == d2->vport.vhca_id) : true) && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? - (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || + (d1->vport.pkt_reformat->id == + d2->vport.pkt_reformat->id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && @@ -2056,16 +2088,18 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; return NULL; - case MLX5_FLOW_NAMESPACE_RDMA_RX: - if (steering->rdma_rx_root_ns) - return &steering->rdma_rx_root_ns->ns; - return NULL; default: break; } if (type == MLX5_FLOW_NAMESPACE_EGRESS) { root_ns = steering->egress_root_ns; + } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_BYPASS_PRIO; + } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) { + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_KERNEL_PRIO; } else { /* Must be NIC RX */ root_ns = steering->root_ns; prio = type; @@ -2155,7 +2189,8 @@ static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace return ns; } -static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio, + int def_miss_act) { struct mlx5_flow_namespace *ns; @@ -2164,6 +2199,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); + ns->def_miss_action = def_miss_act; tree_init_node(&ns->node, NULL, del_sw_ns); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -2230,7 +2266,7 @@ static int init_root_tree_recursive(struct mlx5_flow_steering *steering, base = &fs_prio->node; } else if (init_node->type == FS_TYPE_NAMESPACE) { fs_get_obj(fs_prio, fs_parent_node); - fs_ns = fs_create_namespace(fs_prio); + fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action); if (IS_ERR(fs_ns)) return PTR_ERR(fs_ns); base = &fs_ns->node; @@ -2494,18 +2530,25 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + int err; steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX); if (!steering->rdma_rx_root_ns) return -ENOMEM; - steering->rdma_rx_root_ns->def_miss_action = - MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN; + err = init_root_tree(steering, &rdma_rx_root_fs, + &steering->rdma_rx_root_ns->ns.node); + if (err) + goto out_err; - /* Create single prio */ - prio = fs_create_prio(&steering->rdma_rx_root_ns->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); + set_prio_attrs(steering->rdma_rx_root_ns); + + return 0; + +out_err: + cleanup_root_ns(steering->rdma_rx_root_ns); + steering->rdma_rx_root_ns = NULL; + return err; } static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { @@ -2543,7 +2586,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) } for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) { - ns = fs_create_namespace(maj_prio); + ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); if (IS_ERR(ns)) { err = PTR_ERR(ns); goto out_err; @@ -2846,3 +2889,160 @@ out: return err; } EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); + +static struct mlx5_flow_root_namespace +*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_namespace *ns; + + if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS || + ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS) + ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0); + else + ns = mlx5_get_flow_namespace(dev, ns_type); + if (!ns) + return NULL; + + return find_root(&ns->node); +} + +struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 ns_type, u8 num_actions, + void *modify_actions) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_modify_hdr *modify_hdr; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL); + if (!modify_hdr) + return ERR_PTR(-ENOMEM); + + modify_hdr->ns_type = ns_type; + err = root->cmds->modify_header_alloc(root, ns_type, num_actions, + modify_actions, modify_hdr); + if (err) { + kfree(modify_hdr); + return ERR_PTR(err); + } + + return modify_hdr; +} +EXPORT_SYMBOL(mlx5_modify_header_alloc); + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, modify_hdr->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->modify_header_dealloc(root, modify_hdr); + kfree(modify_hdr); +} +EXPORT_SYMBOL(mlx5_modify_header_dealloc); + +struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_flow_root_namespace *root; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL); + if (!pkt_reformat) + return ERR_PTR(-ENOMEM); + + pkt_reformat->ns_type = ns_type; + pkt_reformat->reformat_type = reformat_type; + err = root->cmds->packet_reformat_alloc(root, reformat_type, size, + reformat_data, ns_type, + pkt_reformat); + if (err) { + kfree(pkt_reformat); + return ERR_PTR(err); + } + + return pkt_reformat; +} +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); + +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, pkt_reformat->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->packet_reformat_dealloc(root, pkt_reformat); + kfree(pkt_reformat); +} +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + if (peer_ns && ns->mode != peer_ns->mode) { + mlx5_core_err(ns->dev, + "Can't peer namespace of different steering mode\n"); + return -EINVAL; + } + + return ns->cmds->set_peer(ns, peer_ns); +} + +/* This function should be called only at init stage of the namespace. + * It is not safe to call this function while steering operations + * are executed in the namespace. + */ +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode) +{ + struct mlx5_flow_root_namespace *root; + const struct mlx5_flow_cmds *cmds; + int err; + + root = find_root(&ns->node); + if (&root->ns != ns) + /* Can't set cmds to non root namespace */ + return -EINVAL; + + if (root->table_type != FS_FT_FDB) + return -EOPNOTSUPP; + + if (root->mode == mode) + return 0; + + if (mode == MLX5_FLOW_STEERING_MODE_SMFS) + cmds = mlx5_fs_cmd_get_dr_cmds(); + else + cmds = mlx5_fs_cmd_get_fw_cmds(); + if (!cmds) + return -EOPNOTSUPP; + + err = cmds->create_ns(root); + if (err) { + mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n", + err); + return err; + } + + root->cmds->destroy_ns(root); + root->cmds = cmds; + root->mode = mode; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index c1252d6be0ef..00717eba2256 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -37,6 +37,24 @@ #include <linux/mlx5/fs.h> #include <linux/rhashtable.h> #include <linux/llist.h> +#include <steering/fs_dr.h> + +struct mlx5_modify_hdr { + enum mlx5_flow_namespace_type ns_type; + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; + +struct mlx5_pkt_reformat { + enum mlx5_flow_namespace_type ns_type; + int reformat_type; /* from mlx5_ifc */ + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; /* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, * and those are in parallel to one another when going over them to connect @@ -80,9 +98,15 @@ enum fs_fte_status { FS_FTE_STATUS_EXISTING = 1UL << 0, }; +enum mlx5_flow_steering_mode { + MLX5_FLOW_STEERING_MODE_DMFS, + MLX5_FLOW_STEERING_MODE_SMFS +}; + struct mlx5_flow_steering { struct mlx5_core_dev *dev; - struct kmem_cache *fgs_cache; + enum mlx5_flow_steering_mode mode; + struct kmem_cache *fgs_cache; struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; @@ -128,6 +152,7 @@ struct mlx5_flow_handle { /* Type of children is mlx5_flow_group */ struct mlx5_flow_table { struct fs_node node; + struct mlx5_fs_dr_table fs_dr_table; u32 id; u16 vport; unsigned int max_fte; @@ -145,6 +170,7 @@ struct mlx5_flow_table { struct list_head fwd_rules; u32 flags; struct rhltable fgs_hash; + enum mlx5_flow_table_miss_action def_miss_action; }; struct mlx5_ft_underlay_qp { @@ -167,6 +193,7 @@ struct mlx5_ft_underlay_qp { /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; + struct mlx5_fs_dr_rule fs_dr_rule; u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; @@ -191,6 +218,7 @@ struct fs_prio { struct mlx5_flow_namespace { /* parent == NULL => root ns */ struct fs_node node; + enum mlx5_flow_table_miss_action def_miss_action; }; struct mlx5_flow_group_mask { @@ -201,6 +229,7 @@ struct mlx5_flow_group_mask { /* Type of children is fs_fte */ struct mlx5_flow_group { struct fs_node node; + struct mlx5_fs_dr_matcher fs_dr_matcher; struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; @@ -212,6 +241,8 @@ struct mlx5_flow_group { struct mlx5_flow_root_namespace { struct mlx5_flow_namespace ns; + enum mlx5_flow_steering_mode mode; + struct mlx5_fs_dr_domain fs_dr_domain; enum fs_flow_table_type table_type; struct mlx5_core_dev *dev; struct mlx5_flow_table *root_ft; @@ -219,7 +250,6 @@ struct mlx5_flow_root_namespace { struct mutex chain_lock; struct list_head underlay_qpns; const struct mlx5_flow_cmds *cmds; - enum mlx5_flow_table_miss_action def_miss_action; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); @@ -230,6 +260,14 @@ void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, unsigned long interval); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns); + +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode); + int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 1834d9f3aa1c..ab69effb056d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -40,6 +40,8 @@ #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) +#define MLX5_FC_POOL_USED_BUFF_RATIO 10 struct mlx5_fc_cache { u64 packets; @@ -58,12 +60,18 @@ struct mlx5_fc { u64 lastpackets; u64 lastbytes; + struct mlx5_fc_bulk *bulk; u32 id; bool aging; struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); +static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); +static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); + /* locking scheme: * * It is the responsibility of the user to prevent concurrent calls or bad @@ -75,7 +83,7 @@ struct mlx5_fc { * access to counter list: * - create (user context) * - mlx5_fc_create() only adds to an addlist to be used by - * mlx5_fc_stats_query_work(). addlist is a lockless single linked list + * mlx5_fc_stats_work(). addlist is a lockless single linked list * that doesn't require any additional synchronization when adding single * node. * - spawn thread to do the actual destroy @@ -136,81 +144,87 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, spin_unlock(&fc_stats->counters_idr_lock); } -/* The function returns the last counter that was queried so the caller - * function can continue calling it till all counters are queried. - */ -static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev, - struct mlx5_fc *first, - u32 last_id) +static int get_max_bulk_query_len(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - struct mlx5_fc *counter = NULL; - struct mlx5_cmd_fc_bulk *b; - bool more = false; - u32 afirst_id; - int num; - int err; + return min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} - int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, - (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +static void update_counter_cache(int index, u32 *bulk_raw_data, + struct mlx5_fc_cache *cache) +{ + void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data, + flow_statistics[index]); + u64 packets = MLX5_GET64(traffic_counter, stats, packets); + u64 bytes = MLX5_GET64(traffic_counter, stats, octets); - /* first id must be aligned to 4 when using bulk query */ - afirst_id = first->id & ~0x3; + if (cache->packets == packets) + return; - /* number of counters to query inc. the last counter */ - num = ALIGN(last_id - afirst_id + 1, 4); - if (num > max_bulk) { - num = max_bulk; - last_id = afirst_id + num - 1; - } + cache->packets = packets; + cache->bytes = bytes; + cache->lastuse = jiffies; +} - b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num); - if (!b) { - mlx5_core_err(dev, "Error allocating resources for bulk query\n"); - return NULL; - } +static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u32 last_id) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + bool query_more_counters = (first->id <= last_id); + int max_bulk_len = get_max_bulk_query_len(dev); + u32 *data = fc_stats->bulk_query_out; + struct mlx5_fc *counter = first; + u32 bulk_base_id; + int bulk_len; + int err; - err = mlx5_cmd_fc_bulk_query(dev, b); - if (err) { - mlx5_core_err(dev, "Error doing bulk query: %d\n", err); - goto out; - } + while (query_more_counters) { + /* first id must be aligned to 4 when using bulk query */ + bulk_base_id = counter->id & ~0x3; - counter = first; - list_for_each_entry_from(counter, &fc_stats->counters, list) { - struct mlx5_fc_cache *c = &counter->cache; - u64 packets; - u64 bytes; + /* number of counters to query inc. the last counter */ + bulk_len = min_t(int, max_bulk_len, + ALIGN(last_id - bulk_base_id + 1, 4)); - if (counter->id > last_id) { - more = true; - break; + err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, + data); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + return; } + query_more_counters = false; - mlx5_cmd_fc_bulk_get(dev, b, - counter->id, &packets, &bytes); + list_for_each_entry_from(counter, &fc_stats->counters, list) { + int counter_index = counter->id - bulk_base_id; + struct mlx5_fc_cache *cache = &counter->cache; - if (c->packets == packets) - continue; + if (counter->id >= bulk_base_id + bulk_len) { + query_more_counters = true; + break; + } - c->packets = packets; - c->bytes = bytes; - c->lastuse = jiffies; + update_counter_cache(counter_index, data, cache); + } } - -out: - mlx5_cmd_fc_bulk_free(b); - - return more ? counter : NULL; } -static void mlx5_free_fc(struct mlx5_core_dev *dev, - struct mlx5_fc *counter) +static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { mlx5_cmd_fc_free(dev, counter->id); kfree(counter); } +static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (counter->bulk) + mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); + else + mlx5_fc_free(dev, counter); +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, @@ -234,7 +248,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) llist_for_each_entry_safe(counter, tmp, dellist, dellist) { mlx5_fc_stats_remove(dev, counter); - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); } if (time_before(now, fc_stats->next_query) || @@ -244,32 +258,62 @@ static void mlx5_fc_stats_work(struct work_struct *work) counter = list_first_entry(&fc_stats->counters, struct mlx5_fc, list); - while (counter) - counter = mlx5_fc_stats_query(dev, counter, last->id); + if (counter) + mlx5_fc_stats_query_counter_range(dev, counter, last->id); fc_stats->next_query = now + fc_stats->sampling_interval; } -struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev) { - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc *counter; int err; counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&counter->list); err = mlx5_cmd_fc_alloc(dev, &counter->id); - if (err) - goto err_out; + if (err) { + kfree(counter); + return ERR_PTR(err); + } + + return counter; +} + +static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + + if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) { + counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool); + if (!IS_ERR(counter)) + return counter; + } + + return mlx5_fc_single_alloc(dev); +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int err; + + if (IS_ERR(counter)) + return counter; + + INIT_LIST_HEAD(&counter->list); + counter->aging = aging; if (aging) { u32 id = counter->id; counter->cache.lastuse = jiffies; - counter->aging = true; + counter->lastbytes = counter->cache.bytes; + counter->lastpackets = counter->cache.packets; idr_preload(GFP_KERNEL); spin_lock(&fc_stats->counters_idr_lock); @@ -290,10 +334,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) return counter; err_out_alloc: - mlx5_cmd_fc_free(dev, counter->id); -err_out: - kfree(counter); - + mlx5_fc_release(dev, counter); return ERR_PTR(err); } EXPORT_SYMBOL(mlx5_fc_create); @@ -317,13 +358,15 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) return; } - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); } EXPORT_SYMBOL(mlx5_fc_destroy); int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int max_bulk_len; + int max_out_len; spin_lock_init(&fc_stats->counters_idr_lock); idr_init(&fc_stats->counters_idr); @@ -331,14 +374,25 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) init_llist_head(&fc_stats->addlist); init_llist_head(&fc_stats->dellist); + max_bulk_len = get_max_bulk_query_len(dev); + max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); + fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL); + if (!fc_stats->bulk_query_out) + return -ENOMEM; + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); if (!fc_stats->wq) - return -ENOMEM; + goto err_wq_create; fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + mlx5_fc_pool_init(&fc_stats->fc_pool, dev); return 0; + +err_wq_create: + kfree(fc_stats->bulk_query_out); + return -ENOMEM; } void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) @@ -352,14 +406,16 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; - idr_destroy(&fc_stats->counters_idr); - tmplist = llist_del_all(&fc_stats->addlist); llist_for_each_entry_safe(counter, tmp, tmplist, addlist) - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list) - mlx5_free_fc(dev, counter); + mlx5_fc_release(dev, counter); + + mlx5_fc_pool_cleanup(&fc_stats->fc_pool); + idr_destroy(&fc_stats->counters_idr); + kfree(fc_stats->bulk_query_out); } int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, @@ -406,3 +462,243 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, fc_stats->sampling_interval = min_t(unsigned long, interval, fc_stats->sampling_interval); } + +/* Flow counter bluks */ + +struct mlx5_fc_bulk { + struct list_head pool_list; + u32 base_id; + int bulk_len; + unsigned long *bitmask; + struct mlx5_fc fcs[0]; +}; + +static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, + u32 id) +{ + counter->bulk = bulk; + counter->id = id; +} + +static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) +{ + return bitmap_weight(bulk->bitmask, bulk->bulk_len); +} + +static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +{ + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; + struct mlx5_fc_bulk *bulk; + int err = -ENOMEM; + int bulk_len; + u32 base_id; + int i; + + alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); + bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; + + bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc), + GFP_KERNEL); + if (!bulk) + goto err_alloc_bulk; + + bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); + if (!bulk->bitmask) + goto err_alloc_bitmask; + + err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id); + if (err) + goto err_mlx5_cmd_bulk_alloc; + + bulk->base_id = base_id; + bulk->bulk_len = bulk_len; + for (i = 0; i < bulk_len; i++) { + mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i); + set_bit(i, bulk->bitmask); + } + + return bulk; + +err_mlx5_cmd_bulk_alloc: + kfree(bulk->bitmask); +err_alloc_bitmask: + kfree(bulk); +err_alloc_bulk: + return ERR_PTR(err); +} + +static int +mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) +{ + if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { + mlx5_core_err(dev, "Freeing bulk before all counters were released\n"); + return -EBUSY; + } + + mlx5_cmd_fc_free(dev, bulk->base_id); + kfree(bulk->bitmask); + kfree(bulk); + + return 0; +} + +static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +{ + int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); + + if (free_fc_index >= bulk->bulk_len) + return ERR_PTR(-ENOSPC); + + clear_bit(free_fc_index, bulk->bitmask); + return &bulk->fcs[free_fc_index]; +} + +static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) +{ + int fc_index = fc->id - bulk->base_id; + + if (test_bit(fc_index, bulk->bitmask)) + return -EINVAL; + + set_bit(fc_index, bulk->bitmask); + return 0; +} + +/* Flow counters pool API */ + +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) +{ + fc_pool->dev = dev; + mutex_init(&fc_pool->pool_lock); + INIT_LIST_HEAD(&fc_pool->fully_used); + INIT_LIST_HEAD(&fc_pool->partially_used); + INIT_LIST_HEAD(&fc_pool->unused); + fc_pool->available_fcs = 0; + fc_pool->used_fcs = 0; + fc_pool->threshold = 0; +} + +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk; + struct mlx5_fc_bulk *tmp; + + list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); +} + +static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool) +{ + fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, + fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO); +} + +static struct mlx5_fc_bulk * +mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *new_bulk; + + new_bulk = mlx5_fc_bulk_create(dev); + if (!IS_ERR(new_bulk)) + fc_pool->available_fcs += new_bulk->bulk_len; + mlx5_fc_pool_update_threshold(fc_pool); + return new_bulk; +} + +static void +mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + + fc_pool->available_fcs -= bulk->bulk_len; + mlx5_fc_bulk_destroy(dev, bulk); + mlx5_fc_pool_update_threshold(fc_pool); +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_from_list(struct list_head *src_list, + struct list_head *next_list, + bool move_non_full_bulk) +{ + struct mlx5_fc_bulk *bulk; + struct mlx5_fc *fc; + + if (list_empty(src_list)) + return ERR_PTR(-ENODATA); + + bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list); + fc = mlx5_fc_bulk_acquire_fc(bulk); + if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0) + list_move(&bulk->pool_list, next_list); + return fc; +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_fc_bulk *new_bulk; + struct mlx5_fc *fc; + + mutex_lock(&fc_pool->pool_lock); + + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used, + &fc_pool->fully_used, false); + if (IS_ERR(fc)) + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused, + &fc_pool->partially_used, + true); + if (IS_ERR(fc)) { + new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool); + if (IS_ERR(new_bulk)) { + fc = ERR_CAST(new_bulk); + goto out; + } + fc = mlx5_fc_bulk_acquire_fc(new_bulk); + list_add(&new_bulk->pool_list, &fc_pool->partially_used); + } + fc_pool->available_fcs--; + fc_pool->used_fcs++; + +out: + mutex_unlock(&fc_pool->pool_lock); + return fc; +} + +static void +mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk = fc->bulk; + int bulk_free_fcs_amount; + + mutex_lock(&fc_pool->pool_lock); + + if (mlx5_fc_bulk_release_fc(bulk, fc)) { + mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); + goto unlock; + } + + fc_pool->available_fcs++; + fc_pool->used_fcs--; + + bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk); + if (bulk_free_fcs_amount == 1) + list_move_tail(&bulk->pool_list, &fc_pool->partially_used); + if (bulk_free_fcs_amount == bulk->bulk_len) { + list_del(&bulk->pool_list); + if (fc_pool->available_fcs > fc_pool->threshold) + mlx5_fc_pool_free_bulk(fc_pool, bulk); + else + list_add(&bulk->pool_list, &fc_pool->unused); + } + +unlock: + mutex_unlock(&fc_pool->pool_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 1a2560e3bf7c..3ed8ab2d703d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -279,7 +279,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } - err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]); + err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0][0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); goto err_destroy_underlay_qp; @@ -296,7 +296,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) { struct mlx5i_priv *ipriv = priv->ppriv; - mlx5e_destroy_tis(priv->mdev, priv->tisn[0]); + mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]); mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index c5a491e22e55..96e64187c089 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) goto err_unint_underlay_qp; } - err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]); + err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0][0]); if (err) { mlx5_core_warn(mdev, "create child tis failed, %d\n", err); goto err_remove_rx_uderlay_qp; @@ -228,7 +228,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) return 0; err_clear_state_opened_flag: - mlx5e_destroy_tis(mdev, epriv->tisn[0]); + mlx5e_destroy_tis(mdev, epriv->tisn[0][0]); err_remove_rx_uderlay_qp: mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); err_unint_underlay_qp: @@ -257,7 +257,7 @@ static int mlx5i_pkey_close(struct net_device *netdev) mlx5i_uninit_underlay_qp(priv); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); - mlx5e_destroy_tis(mdev, priv->tisn[0]); + mlx5e_destroy_tis(mdev, priv->tisn[0][0]); unlock: mutex_unlock(&priv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index e69766393990..5d20d615663e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -248,6 +248,9 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct net_device *fib_dev; struct fib_info *fi; + if (!net_eq(info->net, &init_net)) + return NOTIFY_DONE; + if (info->family != AF_INET) return NOTIFY_DONE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index ea1d4d26ece0..3fc575d1c3ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -2,6 +2,7 @@ // Copyright (c) 2019 Mellanox Technologies. #include "mlx5_core.h" +#include "lib/mlx5.h" int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, void *key, u32 sz_bytes, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c new file mode 100644 index 000000000000..e065c2f68f5a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" + +struct mlx5_dm { + /* protect access to icm bitmask */ + spinlock_t lock; + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; +}; + +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) +{ + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; + struct mlx5_dm *dm; + + if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) + return 0; + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&dm->lock); + + if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->steering_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(steering_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dm->steering_sw_icm_alloc_blocks) + goto err_steering; + } + + if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dm->header_modify_sw_icm_alloc_blocks) + goto err_modify_hdr; + } + + return dm; + +err_modify_hdr: + kfree(dm->steering_sw_icm_alloc_blocks); + +err_steering: + kfree(dm); + + return ERR_PTR(-ENOMEM); +} + +void mlx5_dm_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_dm *dm = dev->dm; + + if (!dev->dm) + return; + + if (dm->steering_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + kfree(dm->steering_sw_icm_alloc_blocks); + } + + if (dm->header_modify_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + kfree(dm->header_modify_sw_icm_alloc_blocks); + } + + kfree(dm); +} + +int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + if (!dev->dm) + return -EOPNOTSUPP; + + if (!length || (length & (length - 1)) || + length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1)) + return -EINVAL; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + if (!block_map) + return -EOPNOTSUPP; + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, + max_blocks, + 0, + num_blocks, 0); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc); + +int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t addr, u32 obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u64 start_idx; + int err; + + if (!dev->dm) + return -EOPNOTSUPP; + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 3dfab91ae5f2..4be4d2d36218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -87,7 +87,7 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); /* This function should only be called after mlx5_cmd_force_teardown_hca */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c new file mode 100644 index 000000000000..583dc7e2aca8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" + +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len, + int offset, bool read) +{ + int rc = -EOPNOTSUPP; + int bytes_returned; + int block_id; + + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len != HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX; + + rc = read ? + hyperv_read_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id, + &bytes_returned) : + hyperv_write_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id); + + /* Make sure len bytes were read successfully */ + if (read && !rc && len != bytes_returned) + rc = -EIO; + + if (rc) { + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n", + read ? "read" : "write", rc, len, + offset); + return rc; + } + + return 0; +} + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, true); +} + +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, false); +} + +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + return hyperv_reg_block_invalidate(dev->pdev, context, + block_invalidate); +} + +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev) +{ + hyperv_reg_block_invalidate(dev->pdev, NULL, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h new file mode 100644 index 000000000000..f9a45573f459 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_H__ +#define __LIB_HV_H__ + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +#include <linux/hyperv.h> +#include <linux/mlx5/driver.h> + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev); +#endif + +#endif /* __LIB_HV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c new file mode 100644 index 000000000000..4047629a876b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/hyperv.h> +#include "mlx5_core.h" +#include "lib/hv.h" +#include "lib/hv_vhca.h" + +struct mlx5_hv_vhca { + struct mlx5_core_dev *dev; + struct workqueue_struct *work_queue; + struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX]; + struct mutex agents_lock; /* Protect agents array */ +}; + +struct mlx5_hv_vhca_work { + struct work_struct invalidate_work; + struct mlx5_hv_vhca *hv_vhca; + u64 block_mask; +}; + +struct mlx5_hv_vhca_data_block { + u16 sequence; + u16 offset; + u8 reserved[4]; + u64 data[15]; +}; + +struct mlx5_hv_vhca_agent { + enum mlx5_hv_vhca_agent_type type; + struct mlx5_hv_vhca *hv_vhca; + void *priv; + u16 seq; + void (*control)(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block); + void (*invalidate)(struct mlx5_hv_vhca_agent *agent, + u64 block_mask); + void (*cleanup)(struct mlx5_hv_vhca_agent *agent); +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + struct mlx5_hv_vhca *hv_vhca = NULL; + + hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL); + if (!hv_vhca) + return ERR_PTR(-ENOMEM); + + hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca"); + if (!hv_vhca->work_queue) { + kfree(hv_vhca); + return ERR_PTR(-ENOMEM); + } + + hv_vhca->dev = dev; + mutex_init(&hv_vhca->agents_lock); + + return hv_vhca; +} + +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + destroy_workqueue(hv_vhca->work_queue); + kfree(hv_vhca); +} + +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work) +{ + struct mlx5_hv_vhca_work *hwork; + struct mlx5_hv_vhca *hv_vhca; + int i; + + hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work); + hv_vhca = hwork->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->invalidate) + continue; + + if (!(BIT(agent->type) & hwork->block_mask)) + continue; + + agent->invalidate(agent, hwork->block_mask); + } + mutex_unlock(&hv_vhca->agents_lock); + + kfree(hwork); +} + +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context; + struct mlx5_hv_vhca_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work); + work->hv_vhca = hv_vhca; + work->block_mask = block_mask; + + queue_work(hv_vhca->work_queue, &work->invalidate_work); +} + +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */) + +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca, + struct mlx5_hv_vhca_control_block *block) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->control) + continue; + + if (!(AGENT_MASK(agent->type) & block->control)) + continue; + + agent->control(agent, block); + } +} + +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca, + u32 *capabilities) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (agent) + *capabilities |= AGENT_MASK(agent->type); + } +} + +static void +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent, + u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + struct mlx5_core_dev *dev = hv_vhca->dev; + struct mlx5_hv_vhca_control_block *block; + u32 capabilities = 0; + int err; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return; + + err = mlx5_hv_read_config(dev, block, sizeof(*block), 0); + if (err) + goto free_block; + + mlx5_hv_vhca_capabilities(hv_vhca, &capabilities); + + /* In case no capabilities, send empty block in return */ + if (!capabilities) { + memset(block, 0, sizeof(*block)); + goto write; + } + + if (block->capabilities != capabilities) + block->capabilities = capabilities; + + if (block->control & ~capabilities) + goto free_block; + + mlx5_hv_vhca_agents_control(hv_vhca, block); + block->command_ack = block->command; + +write: + mlx5_hv_write_config(dev, block, sizeof(*block), 0); + +free_block: + kfree(block); +} + +static struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca) +{ + return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL, + NULL, + mlx5_hv_vhca_control_agent_invalidate, + NULL, NULL); +} + +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + mlx5_hv_vhca_agent_destroy(agent); +} + +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int err; + + if (IS_ERR_OR_NULL(hv_vhca)) + return IS_ERR_OR_NULL(hv_vhca); + + err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca, + mlx5_hv_vhca_invalidate); + if (err) + return err; + + agent = mlx5_hv_vhca_control_agent_create(hv_vhca); + if (IS_ERR_OR_NULL(agent)) { + mlx5_hv_unregister_invalidate(hv_vhca->dev); + return IS_ERR_OR_NULL(agent); + } + + hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent; + + return 0; +} + +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int i; + + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL]; + if (agent) + mlx5_hv_vhca_control_agent_destroy(agent); + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) + WARN_ON(hv_vhca->agents[i]); + + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_unregister_invalidate(hv_vhca->dev); +} + +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca) +{ + mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL)); +} + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleaup)(struct mlx5_hv_vhca_agent *agent), + void *priv) +{ + struct mlx5_hv_vhca_agent *agent; + + if (IS_ERR_OR_NULL(hv_vhca)) + return ERR_PTR(-ENOMEM); + + if (type >= MLX5_HV_VHCA_AGENT_MAX) + return ERR_PTR(-EINVAL); + + mutex_lock(&hv_vhca->agents_lock); + if (hv_vhca->agents[type]) { + mutex_unlock(&hv_vhca->agents_lock); + return ERR_PTR(-EINVAL); + } + mutex_unlock(&hv_vhca->agents_lock); + + agent = kzalloc(sizeof(*agent), GFP_KERNEL); + if (!agent) + return ERR_PTR(-ENOMEM); + + agent->type = type; + agent->hv_vhca = hv_vhca; + agent->priv = priv; + agent->control = control; + agent->invalidate = invalidate; + agent->cleanup = cleaup; + + mutex_lock(&hv_vhca->agents_lock); + hv_vhca->agents[type] = agent; + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_vhca_agents_update(hv_vhca); + + return agent; +} + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + + if (WARN_ON(agent != hv_vhca->agents[agent->type])) { + mutex_unlock(&hv_vhca->agents_lock); + return; + } + + hv_vhca->agents[agent->type] = NULL; + mutex_unlock(&hv_vhca->agents_lock); + + if (agent->cleanup) + agent->cleanup(agent); + + kfree(agent); + + mlx5_hv_vhca_agents_update(hv_vhca); +} + +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_data_block *data_block, + void *src, int len, int *offset) +{ + int bytes = min_t(int, (int)sizeof(data_block->data), len); + + data_block->sequence = agent->seq; + data_block->offset = (*offset)++; + memcpy(data_block->data, src, bytes); + + return bytes; +} + +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent) +{ + agent->seq++; +} + +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX; + int block_offset = 0; + int total = 0; + int err; + + while (len) { + struct mlx5_hv_vhca_data_block data_block = {0}; + int bytes; + + bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block, + buf + total, + len, &block_offset); + if (!bytes) + return -ENOMEM; + + err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block, + sizeof(data_block), offset); + if (err) + return err; + + total += bytes; + len -= bytes; + } + + mlx5_hv_vhca_agent_seq_update(agent); + + return 0; +} + +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent) +{ + return agent->priv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h new file mode 100644 index 000000000000..4bad6a5fde56 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_VHCA_H__ +#define __LIB_HV_VHCA_H__ + +#include "en.h" +#include "lib/hv.h" + +struct mlx5_hv_vhca_agent; +struct mlx5_hv_vhca; +struct mlx5_hv_vhca_control_block; + +enum mlx5_hv_vhca_agent_type { + MLX5_HV_VHCA_AGENT_CONTROL = 0, + MLX5_HV_VHCA_AGENT_STATS = 1, + MLX5_HV_VHCA_AGENT_MAX = 32, +}; + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +struct mlx5_hv_vhca_control_block { + u32 capabilities; + u32 control; + u16 command; + u16 command_ack; + u16 version; + u16 rings; + u32 reserved1[28]; +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev); +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca); +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask); + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context); + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent); +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len); +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent); + +#else + +static inline struct mlx5_hv_vhca * +mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + return 0; +} + +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline void mlx5_hv_vhca_invalidate(void *context, + u64 block_mask) +{ +} + +static inline struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ +} + +static inline int +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + return 0; +} +#endif + +#endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c index b9d4f4e19ff9..148b55c3db7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -32,6 +32,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/refcount.h> #include <linux/mlx5/driver.h> #include <net/vxlan.h> #include "mlx5_core.h" @@ -48,7 +49,7 @@ struct mlx5_vxlan { struct mlx5_vxlan_port { struct hlist_node hlist; - atomic_t refcount; + refcount_t refcount; u16 udp_port; }; @@ -113,7 +114,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) vxlanp = mlx5_vxlan_lookup_port(vxlan, port); if (vxlanp) { - atomic_inc(&vxlanp->refcount); + refcount_inc(&vxlanp->refcount); return 0; } @@ -137,7 +138,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) } vxlanp->udp_port = port; - atomic_set(&vxlanp->refcount, 1); + refcount_set(&vxlanp->refcount, 1); spin_lock_bh(&vxlan->lock); hash_add(vxlan->htable, &vxlanp->hlist, port); @@ -170,7 +171,7 @@ int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) goto out_unlock; } - if (atomic_dec_and_test(&vxlanp->refcount)) { + if (refcount_dec_and_test(&vxlanp->refcount)) { hash_del(&vxlanp->hlist); remove = true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b15b27a497fc..9648c2297803 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -69,6 +69,7 @@ #include "lib/pci_vsc.h" #include "diag/fw_tracer.h" #include "ecpf.h" +#include "lib/hv_vhca.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -495,6 +496,12 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev) ODP_CAP_SET_MAX(dev, xrc_odp_caps.write); ODP_CAP_SET_MAX(dev, xrc_odp_caps.read); ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic); + ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.send); + ODP_CAP_SET_MAX(dev, dc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.write); + ODP_CAP_SET_MAX(dev, dc_odp_caps.read); + ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic); if (do_set) err = set_caps(dev, set_ctx, set_sz, @@ -826,11 +833,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eq_cleanup; } - err = mlx5_cq_debugfs_init(dev); - if (err) { - mlx5_core_err(dev, "failed to initialize cq debugfs\n"); - goto err_events_cleanup; - } + mlx5_cq_debugfs_init(dev); mlx5_init_qp_table(dev); @@ -873,7 +876,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eswitch_cleanup; } + dev->dm = mlx5_dm_create(dev); + if (IS_ERR(dev->dm)) + mlx5_core_warn(dev, "Failed to init device memory%d\n", err); + dev->tracer = mlx5_fw_tracer_create(dev); + dev->hv_vhca = mlx5_hv_vhca_create(dev); return 0; @@ -891,7 +899,6 @@ err_tables_cleanup: mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); -err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); @@ -905,7 +912,9 @@ err_devcom: static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { + mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); + mlx5_dm_cleanup(dev); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_sriov_cleanup(dev); @@ -1072,6 +1081,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_fw_tracer; } + mlx5_hv_vhca_init(dev->hv_vhca); + err = mlx5_fpga_device_start(dev); if (err) { mlx5_core_err(dev, "fpga device start failed %d\n", err); @@ -1127,6 +1138,7 @@ err_tls_start: err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: + mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: mlx5_eq_table_destroy(dev); @@ -1147,6 +1159,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); + mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); @@ -1217,8 +1230,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { int err = 0; - if (cleanup) + if (cleanup) { + mlx5_unregister_device(dev); mlx5_drain_health_wq(dev); + } mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1369,7 +1384,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); - mlx5_unregister_device(dev); if (mlx5_unload_one(dev, true)) { mlx5_core_err(dev, "mlx5_unload_one failed\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 471bbc48bc1f..b100489dc85c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -146,7 +146,7 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); void mlx5_cmd_flush(struct mlx5_core_dev *dev); -int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, @@ -198,6 +198,9 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev); +void mlx5_dm_cleanup(struct mlx5_core_dev *dev); + #define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ MLX5_CAP_GEN((mdev), pps_modify) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index b8ba74de9555..c3aea4cc2fff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -53,7 +53,7 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) common = radix_tree_lookup(&table->tree, rsn); if (common) - atomic_inc(&common->refcount); + refcount_inc(&common->refcount); spin_unlock_irqrestore(&table->lock, flags); @@ -62,7 +62,7 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common) { - if (atomic_dec_and_test(&common->refcount)) + if (refcount_dec_and_test(&common->refcount)) complete(&common->free); } @@ -162,7 +162,7 @@ static int rsc_event_notifier(struct notifier_block *nb, common = mlx5_get_rsc(table, rsn); if (!common) { - mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn); + mlx5_core_dbg(dev, "Async event for unknown resource 0x%x\n", rsn); return NOTIFY_OK; } @@ -209,7 +209,7 @@ static int create_resource_common(struct mlx5_core_dev *dev, if (err) return err; - atomic_set(&qp->common.refcount, 1); + refcount_set(&qp->common.refcount, 1); init_completion(&qp->common.free); qp->pid = current->pid; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 17ce9dd56b13..0fc7de4aa572 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -14,9 +14,6 @@ static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev) { struct mlx5_core_roce *roce = &dev->priv.roce; - if (!roce->ft) - return; - mlx5_del_flow_rules(roce->allow_rule); mlx5_destroy_flow_group(roce->fg); mlx5_destroy_flow_table(roce->ft); @@ -51,7 +48,7 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) return -ENOMEM; } - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX); + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL); if (!ns) { mlx5_core_err(dev, "Failed to get RDMA RX namespace"); err = -EOPNOTSUPP; @@ -145,6 +142,11 @@ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) { + struct mlx5_core_roce *roce = &dev->priv.roce; + + if (!roce->ft) + return; + mlx5_rdma_disable_roce_steering(dev); mlx5_rdma_del_roce_addr(dev); mlx5_nic_vport_disable_roce(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile new file mode 100644 index 000000000000..c78512eed8d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c new file mode 100644 index 000000000000..7d81a7735de5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -0,0 +1,1588 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +enum dr_action_domain { + DR_ACTION_DOMAIN_NIC_INGRESS, + DR_ACTION_DOMAIN_NIC_EGRESS, + DR_ACTION_DOMAIN_FDB_INGRESS, + DR_ACTION_DOMAIN_FDB_EGRESS, + DR_ACTION_DOMAIN_MAX, +}; + +enum dr_action_valid_state { + DR_ACTION_STATE_ERR, + DR_ACTION_STATE_NO_ACTION, + DR_ACTION_STATE_REFORMAT, + DR_ACTION_STATE_MODIFY_HDR, + DR_ACTION_STATE_MODIFY_VLAN, + DR_ACTION_STATE_NON_TERM, + DR_ACTION_STATE_TERM, + DR_ACTION_STATE_MAX, +}; + +static const enum dr_action_valid_state +next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = { + [DR_ACTION_DOMAIN_NIC_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_NIC_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_REFORMAT] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_MODIFY_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_REFORMAT, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_MODIFY_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, +}; + +struct dr_action_modify_field_conv { + u16 hw_field; + u8 start; + u8 end; + u8 l3_type; + u8 l4_type; +}; + +static const struct dr_action_modify_field_conv dr_action_conv_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 32, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_0, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 0, .end = 5, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 48, .end = 56, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_1, .start = 8, .end = 15, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 0, .end = 15, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_0, .start = 16, .end = 31, + .l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_3, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_4, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_2, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 0, .end = 31, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L3_0, .start = 32, .end = 63, + .l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_METADATA, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_REG_2, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L4_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = MLX5DR_ACTION_MDFY_HW_FLD_L2_2, .start = 0, .end = 15, + }, +}; + +#define MAX_VLANS 2 +struct dr_action_vlan_info { + int count; + u32 headers[MAX_VLANS]; +}; + +struct dr_action_apply_attr { + u32 modify_index; + u16 modify_actions; + u32 decap_index; + u16 decap_actions; + u8 decap_with_vlan:1; + u64 final_icm_addr; + u32 flow_tag; + u32 ctr_id; + u16 gvmi; + u16 hit_gvmi; + u32 reformat_id; + u32 reformat_size; + struct dr_action_vlan_info vlans; +}; + +static int +dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type, + enum mlx5dr_action_type *action_type) +{ + switch (reformat_type) { + case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L2_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L2; + break; + case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L3_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L3; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void dr_actions_init_next_ste(u8 **last_ste, + u32 *added_stes, + enum mlx5dr_ste_entry_type entry_type, + u16 gvmi) +{ + (*added_stes)++; + *last_ste += DR_STE_SIZE; + mlx5dr_ste_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, entry_type, gvmi); +} + +static void dr_actions_apply_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *added_stes) +{ + bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] || + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]; + + /* We want to make sure the modify header comes before L2 + * encapsulation. The reason for that is that we support + * modify headers for outer headers only + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_TX, + attr->gvmi); + + mlx5dr_ste_set_tx_push_vlan(last_ste, + attr->vlans.headers[i], + encap); + } + } + + if (encap) { + /* Modify header and encapsulation require a different STEs. + * Since modify header STE format doesn't support encapsulation + * tunneling_action. + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] || + action_type_set[DR_ACTION_TYP_PUSH_VLAN]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_TX, + attr->gvmi); + + mlx5dr_ste_set_tx_encap(last_ste, + attr->reformat_id, + attr->reformat_size, + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]); + /* Whenever prio_tag_required enabled, we can be sure that the + * previous table (ACL) already push vlan to our packet, + * And due to HW limitation we need to set this bit, otherwise + * push vlan + reformat will not work. + */ + if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required)) + mlx5dr_ste_set_go_back_bit(last_ste); + } + + if (action_type_set[DR_ACTION_TYP_CTR]) + mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id); +} + +static void dr_actions_apply_rx(u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *added_stes) +{ + if (action_type_set[DR_ACTION_TYP_CTR]) + mlx5dr_ste_set_counter_id(last_ste, attr->ctr_id); + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + mlx5dr_ste_set_rx_decap_l3(last_ste, attr->decap_with_vlan); + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->decap_actions, + attr->decap_index); + } + + if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) + mlx5dr_ste_set_rx_decap(last_ste); + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || + action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] || + action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_RX, + attr->gvmi); + + mlx5dr_ste_set_rx_pop_vlan(last_ste); + } + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_MODIFY_PKT, + attr->gvmi); + else + mlx5dr_ste_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT); + + mlx5dr_ste_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (mlx5dr_ste_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT) + dr_actions_init_next_ste(&last_ste, + added_stes, + MLX5DR_STE_TYPE_RX, + attr->gvmi); + + mlx5dr_ste_rx_set_flow_tag(last_ste, attr->flow_tag); + } +} + +/* Apply the actions on the rule STE array starting from the last_ste. + * Actions might require more than one STE, new_num_stes will return + * the new size of the STEs array, rule with actions. + */ +static void dr_actions_apply(struct mlx5dr_domain *dmn, + enum mlx5dr_ste_entry_type ste_type, + u8 *action_type_set, + u8 *last_ste, + struct dr_action_apply_attr *attr, + u32 *new_num_stes) +{ + u32 added_stes = 0; + + if (ste_type == MLX5DR_STE_TYPE_RX) + dr_actions_apply_rx(action_type_set, last_ste, attr, &added_stes); + else + dr_actions_apply_tx(dmn, action_type_set, last_ste, attr, &added_stes); + + last_ste += added_stes * DR_STE_SIZE; + *new_num_stes += added_stes; + + mlx5dr_ste_set_hit_gvmi(last_ste, attr->hit_gvmi); + mlx5dr_ste_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static enum dr_action_domain +dr_action_get_action_domain(enum mlx5dr_domain_type domain, + enum mlx5dr_ste_entry_type ste_type) +{ + switch (domain) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + return DR_ACTION_DOMAIN_NIC_INGRESS; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + return DR_ACTION_DOMAIN_NIC_EGRESS; + case MLX5DR_DOMAIN_TYPE_FDB: + if (ste_type == MLX5DR_STE_TYPE_RX) + return DR_ACTION_DOMAIN_FDB_INGRESS; + return DR_ACTION_DOMAIN_FDB_EGRESS; + default: + WARN_ON(true); + return DR_ACTION_DOMAIN_MAX; + } +} + +static +int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain, + u32 action_type, + u32 *state) +{ + u32 cur_state = *state; + + /* Check action state machine is valid */ + *state = next_action_state[action_domain][cur_state][action_type]; + + if (*state == DR_ACTION_STATE_ERR) + return -EOPNOTSUPP; + + return 0; +} + +static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, + struct mlx5dr_action *dest_action, + u64 *final_icm_addr) +{ + int ret; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_FT: + /* Allow destination flow table only if table is a terminating + * table, since there is an *assumption* that in such case FW + * will recalculate the CS. + */ + if (dest_action->dest_tbl.is_fw_tbl) { + *final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr; + } else { + mlx5dr_dbg(dmn, + "Destination FT should be terminating when modify TTL is used\n"); + return -EINVAL; + } + break; + + case DR_ACTION_TYP_VPORT: + /* If destination is vport we will get the FW flow table + * that recalculates the CS and forwards to the vport. + */ + ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn, + dest_action->vport.num, + final_icm_addr); + if (ret) { + mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n"); + return ret; + } + break; + + default: + break; + } + + return 0; +} + +#define WITH_VLAN_NUM_HW_ACTIONS 6 + +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + u8 action_type_set[DR_ACTION_TYP_MAX] = {}; + struct mlx5dr_action *dest_action = NULL; + u32 state = DR_ACTION_STATE_NO_ACTION; + struct dr_action_apply_attr attr = {}; + enum dr_action_domain action_domain; + bool recalc_cs_required = false; + u8 *last_ste; + int i, ret; + + attr.gvmi = dmn->info.caps.gvmi; + attr.hit_gvmi = dmn->info.caps.gvmi; + attr.final_icm_addr = nic_dmn->default_icm_addr; + action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type); + + for (i = 0; i < num_actions; i++) { + struct mlx5dr_action *action; + int max_actions_type = 1; + u32 action_type; + + action = actions[i]; + action_type = action->action_type; + + switch (action_type) { + case DR_ACTION_TYP_DROP: + attr.final_icm_addr = nic_dmn->drop_icm_addr; + break; + case DR_ACTION_TYP_FT: + dest_action = action; + if (!action->dest_tbl.is_fw_tbl) { + if (action->dest_tbl.tbl->dmn != dmn) { + mlx5dr_dbg(dmn, + "Destination table belongs to a different domain\n"); + goto out_invalid_arg; + } + if (action->dest_tbl.tbl->level <= matcher->tbl->level) { + mlx5dr_dbg(dmn, + "Destination table level should be higher than source table\n"); + goto out_invalid_arg; + } + attr.final_icm_addr = rx_rule ? + action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr; + } else { + struct mlx5dr_cmd_query_flow_table_details output; + int ret; + + /* get the relevant addresses */ + if (!action->dest_tbl.fw_tbl.rx_icm_addr) { + ret = mlx5dr_cmd_query_flow_table(action->dest_tbl.fw_tbl.mdev, + action->dest_tbl.fw_tbl.ft->type, + action->dest_tbl.fw_tbl.ft->id, + &output); + if (!ret) { + action->dest_tbl.fw_tbl.tx_icm_addr = + output.sw_owner_icm_root_1; + action->dest_tbl.fw_tbl.rx_icm_addr = + output.sw_owner_icm_root_0; + } else { + mlx5dr_dbg(dmn, + "Failed mlx5_cmd_query_flow_table ret: %d\n", + ret); + return ret; + } + } + attr.final_icm_addr = rx_rule ? + action->dest_tbl.fw_tbl.rx_icm_addr : + action->dest_tbl.fw_tbl.tx_icm_addr; + } + break; + case DR_ACTION_TYP_QP: + mlx5dr_info(dmn, "Domain doesn't support QP\n"); + goto out_invalid_arg; + case DR_ACTION_TYP_CTR: + attr.ctr_id = action->ctr.ctr_id + + action->ctr.offeset; + break; + case DR_ACTION_TYP_TAG: + attr.flow_tag = action->flow_tag; + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + attr.decap_index = action->rewrite.index; + attr.decap_actions = action->rewrite.num_of_actions; + attr.decap_with_vlan = + attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; + break; + case DR_ACTION_TYP_MODIFY_HDR: + attr.modify_index = action->rewrite.index; + attr.modify_actions = action->rewrite.num_of_actions; + recalc_cs_required = action->rewrite.modify_ttl; + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + attr.reformat_size = action->reformat.reformat_size; + attr.reformat_id = action->reformat.reformat_id; + break; + case DR_ACTION_TYP_VPORT: + attr.hit_gvmi = action->vport.caps->vhca_gvmi; + dest_action = action; + if (rx_rule) { + /* Loopback on WIRE vport is not supported */ + if (action->vport.num == WIRE_PORT) + goto out_invalid_arg; + + attr.final_icm_addr = action->vport.caps->icm_address_rx; + } else { + attr.final_icm_addr = action->vport.caps->icm_address_tx; + } + break; + case DR_ACTION_TYP_POP_VLAN: + max_actions_type = MAX_VLANS; + attr.vlans.count++; + break; + case DR_ACTION_TYP_PUSH_VLAN: + max_actions_type = MAX_VLANS; + if (attr.vlans.count == MAX_VLANS) + return -EINVAL; + + attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr; + break; + default: + goto out_invalid_arg; + } + + /* Check action duplication */ + if (++action_type_set[action_type] > max_actions_type) { + mlx5dr_dbg(dmn, "Action type %d supports only max %d time(s)\n", + action_type, max_actions_type); + goto out_invalid_arg; + } + + /* Check action state machine is valid */ + if (dr_action_validate_and_get_next_state(action_domain, + action_type, + &state)) { + mlx5dr_dbg(dmn, "Invalid action sequence provided\n"); + return -EOPNOTSUPP; + } + } + + *new_hw_ste_arr_sz = nic_matcher->num_of_builders; + last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); + + /* Due to a HW bug, modifying TTL on RX flows will cause an incorrect + * checksum calculation. In this case we will use a FW table to + * recalculate. + */ + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && + rx_rule && recalc_cs_required && dest_action) { + ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); + if (ret) { + mlx5dr_dbg(dmn, + "Failed to handle checksum recalculation err %d\n", + ret); + return ret; + } + } + + dr_actions_apply(dmn, + nic_dmn->ste_type, + action_type_set, + last_ste, + &attr, + new_hw_ste_arr_sz); + + return 0; + +out_invalid_arg: + return -EINVAL; +} + +#define CVLAN_ETHERTYPE 0x8100 +#define SVLAN_ETHERTYPE 0x88a8 +#define HDR_LEN_L2_ONLY 14 +#define HDR_LEN_L2_VLAN 18 +#define REWRITE_HW_ACTION_NUM 6 + +static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action, + void *data, size_t data_sz) +{ + struct mlx5_ifc_l2_hdr_bits *l2_hdr = data; + u64 ops[REWRITE_HW_ACTION_NUM] = {}; + u32 hdr_fld_4b; + u16 hdr_fld_2b; + u16 vlan_type; + bool vlan; + int i = 0; + int ret; + + vlan = (data_sz != HDR_LEN_L2_ONLY); + + /* dmac_47_16 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 16); + hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_4b); + i++; + + /* smac_47_16 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 16); + hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 | + MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_4b); + i++; + + /* dmac_15_0 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_0); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + i++; + + /* ethertype + (optional) vlan */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 32); + if (!vlan) { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, ops + i, destination_length, 16); + } else { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN; + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan); + hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b; + MLX5_SET(dr_action_hw_set, ops + i, inline_data, hdr_fld_4b); + MLX5_SET(dr_action_hw_set, ops + i, destination_length, 18); + } + i++; + + /* smac_15_0 */ + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_1); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + i++; + + if (vlan) { + MLX5_SET(dr_action_hw_set, ops + i, + opcode, MLX5DR_ACTION_MDFY_HW_OP_SET); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type); + MLX5_SET(dr_action_hw_set, ops + i, + inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, ops + i, + destination_length, 16); + MLX5_SET(dr_action_hw_set, ops + i, + destination_field_code, MLX5DR_ACTION_MDFY_HW_FLD_L2_2); + MLX5_SET(dr_action_hw_set, ops + i, + destination_left_shifter, 0); + i++; + } + + action->rewrite.data = (void *)ops; + action->rewrite.num_of_actions = i; + action->rewrite.chunk->byte_size = i * sizeof(*ops); + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) { + mlx5dr_dbg(dmn, "Writing encapsulation action to ICM failed\n"); + return ret; + } + + return 0; +} + +static struct mlx5dr_action * +dr_action_create_generic(enum mlx5dr_action_type action_type) +{ + struct mlx5dr_action *action; + + action = kzalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return NULL; + + action->action_type = action_type; + refcount_set(&action->refcount, 1); + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_drop(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_DROP); +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) +{ + struct mlx5dr_action *action; + + refcount_inc(&tbl->refcount); + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto dec_ref; + + action->dest_tbl.tbl = tbl; + + return action; + +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, + struct mlx5_core_dev *mdev) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl.is_fw_tbl = 1; + action->dest_tbl.fw_tbl.ft = ft; + action->dest_tbl.fw_tbl.mdev = mdev; + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_CTR); + if (!action) + return NULL; + + action->ctr.ctr_id = counter_id; + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_TAG); + if (!action) + return NULL; + + action->flow_tag = tag_value & 0xffffff; + + return action; +} + +static int +dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type, + struct mlx5dr_domain *dmn, + size_t data_sz, + void *data) +{ + if ((!data && data_sz) || (data && !data_sz) || reformat_type > + DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Invalid reformat parameter!\n"); + goto out_err; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + return 0; + + if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 && + reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) { + mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n"); + goto out_err; + } + } else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 && + reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -EINVAL; +} + +#define ACTION_CACHE_LINE_SIZE 64 + +static int +dr_action_create_reformat_action(struct mlx5dr_domain *dmn, + size_t data_sz, void *data, + struct mlx5dr_action *action) +{ + u32 reformat_id; + int ret; + + switch (action->action_type) { + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + { + enum mlx5_reformat_ctx_type rt; + + if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2) + rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + else + rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + + ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, data_sz, data, + &reformat_id); + if (ret) + return ret; + + action->reformat.reformat_id = reformat_id; + action->reformat.reformat_size = data_sz; + return 0; + } + case DR_ACTION_TYP_TNL_L2_TO_L2: + { + return 0; + } + case DR_ACTION_TYP_TNL_L3_TO_L2: + { + /* Only Ethernet frame is supported, with VLAN (18) or without (14) */ + if (data_sz != HDR_LEN_L2_ONLY && data_sz != HDR_LEN_L2_VLAN) + return -EINVAL; + + action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + DR_CHUNK_SIZE_8); + if (!action->rewrite.chunk) + return -ENOMEM; + + action->rewrite.index = (action->rewrite.chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = dr_actions_l2_rewrite(dmn, action, data, data_sz); + if (ret) { + mlx5dr_icm_free_chunk(action->rewrite.chunk); + return ret; + } + return 0; + } + default: + mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type); + return -EINVAL; + } +} + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN); +} + +struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn, + __be32 vlan_hdr) +{ + u32 vlan_hdr_h = ntohl(vlan_hdr); + u16 ethertype = vlan_hdr_h >> 16; + struct mlx5dr_action *action; + + if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) { + mlx5dr_dbg(dmn, "Invalid vlan ethertype\n"); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN); + if (!action) + return NULL; + + action->push_vlan.vlan_hdr = vlan_hdr_h; + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data) +{ + enum mlx5dr_action_type action_type; + struct mlx5dr_action *action; + int ret; + + refcount_inc(&dmn->refcount); + + /* General checks */ + ret = dr_action_reformat_to_action_type(reformat_type, &action_type); + if (ret) { + mlx5dr_dbg(dmn, "Invalid reformat_type provided\n"); + goto dec_ref; + } + + ret = dr_action_verify_reformat_params(action_type, dmn, data_sz, data); + if (ret) + goto dec_ref; + + action = dr_action_create_generic(action_type); + if (!action) + goto dec_ref; + + action->reformat.dmn = dmn; + + ret = dr_action_create_reformat_action(dmn, + data_sz, + data, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +static const struct dr_action_modify_field_conv * +dr_action_modify_get_hw_info(u16 sw_field) +{ + const struct dr_action_modify_field_conv *hw_action_info; + + if (sw_field >= ARRAY_SIZE(dr_action_conv_arr)) + goto not_found; + + hw_action_info = &dr_action_conv_arr[sw_field]; + if (!hw_action_info->end && !hw_action_info->start) + goto not_found; + + return hw_action_info; + +not_found: + return NULL; +} + +static int +dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct dr_action_modify_field_conv **ret_hw_info) +{ + const struct dr_action_modify_field_conv *hw_action_info; + u8 offset, length, max_length, action; + u16 sw_field; + u8 hw_opcode; + u32 data; + + /* Get SW modify action data */ + action = MLX5_GET(set_action_in, sw_action, action_type); + length = MLX5_GET(set_action_in, sw_action, length); + offset = MLX5_GET(set_action_in, sw_action, offset); + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = dr_action_modify_get_hw_info(sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify action invalid field given\n"); + return -EINVAL; + } + + max_length = hw_action_info->end - hw_action_info->start + 1; + + switch (action) { + case MLX5_ACTION_TYPE_SET: + hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_SET; + /* PRM defines that length zero specific length of 32bits */ + if (!length) + length = 32; + + if (length + offset > max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + break; + + case MLX5_ACTION_TYPE_ADD: + hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_ADD; + offset = 0; + length = max_length; + break; + + default: + mlx5dr_info(dmn, "Unsupported action_type for modify action\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(dr_action_hw_set, hw_action, opcode, hw_opcode); + + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, + hw_action_info->hw_field); + + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, + hw_action_info->start + offset); + + MLX5_SET(dr_action_hw_set, hw_action, destination_length, + length == 32 ? 0 : length); + + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_check_field_limitation(struct mlx5dr_domain *dmn, + const __be64 *sw_action) +{ + u16 sw_field; + u8 action; + + sw_field = MLX5_GET(set_action_in, sw_action, field); + action = MLX5_GET(set_action_in, sw_action, action_type); + + /* Check if SW field is supported in current domain (RX/TX) */ + if (action == MLX5_ACTION_TYPE_SET) { + if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_field); + return -EINVAL; + } + } + + if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_field); + return -EINVAL; + } + } + } else if (action == MLX5_ACTION_TYPE_ADD) { + if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL && + sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) { + mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", sw_field); + return -EINVAL; + } + } else { + mlx5dr_info(dmn, "Unsupported action %d modify action\n", action); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +dr_action_modify_check_is_ttl_modify(const u64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + + return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL; +} + +static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn, + u32 max_hw_actions, + u32 num_sw_actions, + __be64 sw_actions[], + __be64 hw_actions[], + u32 *num_hw_actions, + bool *modify_ttl) +{ + const struct dr_action_modify_field_conv *hw_action_info; + u16 hw_field = MLX5DR_ACTION_MDFY_HW_FLD_RESERVED; + u32 l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE; + u32 l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE; + int ret, i, hw_idx = 0; + __be64 *sw_action; + __be64 hw_action; + + *modify_ttl = false; + + for (i = 0; i < num_sw_actions; i++) { + sw_action = &sw_actions[i]; + + ret = dr_action_modify_check_field_limitation(dmn, sw_action); + if (ret) + return ret; + + if (!(*modify_ttl)) + *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action); + + /* Convert SW action to HW action */ + ret = dr_action_modify_sw_to_hw(dmn, + sw_action, + &hw_action, + &hw_action_info); + if (ret) + return ret; + + /* Due to a HW limitation we cannot modify 2 different L3 types */ + if (l3_type && hw_action_info->l3_type && + hw_action_info->l3_type != l3_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n"); + return -EINVAL; + } + if (hw_action_info->l3_type) + l3_type = hw_action_info->l3_type; + + /* Due to a HW limitation we cannot modify two different L4 types */ + if (l4_type && hw_action_info->l4_type && + hw_action_info->l4_type != l4_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n"); + return -EINVAL; + } + if (hw_action_info->l4_type) + l4_type = hw_action_info->l4_type; + + /* HW reads and executes two actions at once this means we + * need to create a gap if two actions access the same field + */ + if ((hw_idx % 2) && hw_field == hw_action_info->hw_field) { + /* Check if after gap insertion the total number of HW + * modify actions doesn't exceeds the limit + */ + hw_idx++; + if ((num_sw_actions + hw_idx - i) >= max_hw_actions) { + mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n"); + return -EINVAL; + } + } + hw_field = hw_action_info->hw_field; + + hw_actions[hw_idx] = hw_action; + hw_idx++; + } + + *num_hw_actions = hw_idx; + + return 0; +} + +static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, + size_t actions_sz, + __be64 actions[], + struct mlx5dr_action *action) +{ + struct mlx5dr_icm_chunk *chunk; + u32 max_hw_actions; + u32 num_hw_actions; + u32 num_sw_actions; + __be64 *hw_actions; + bool modify_ttl; + int ret; + + num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE; + max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16); + + if (num_sw_actions > max_hw_actions) { + mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n", + num_sw_actions, max_hw_actions); + return -EINVAL; + } + + chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, DR_CHUNK_SIZE_16); + if (!chunk) + return -ENOMEM; + + hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL); + if (!hw_actions) { + ret = -ENOMEM; + goto free_chunk; + } + + ret = dr_actions_convert_modify_header(dmn, + max_hw_actions, + num_sw_actions, + actions, + hw_actions, + &num_hw_actions, + &modify_ttl); + if (ret) + goto free_hw_actions; + + action->rewrite.chunk = chunk; + action->rewrite.modify_ttl = modify_ttl; + action->rewrite.data = (u8 *)hw_actions; + action->rewrite.num_of_actions = num_hw_actions; + action->rewrite.index = (chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; + + ret = mlx5dr_send_postsend_action(dmn, action); + if (ret) + goto free_hw_actions; + + return 0; + +free_hw_actions: + kfree(hw_actions); +free_chunk: + mlx5dr_icm_free_chunk(chunk); + return ret; +} + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn, + u32 flags, + size_t actions_sz, + __be64 actions[]) +{ + struct mlx5dr_action *action; + int ret = 0; + + refcount_inc(&dmn->refcount); + + if (actions_sz % DR_MODIFY_ACTION_SIZE) { + mlx5dr_dbg(dmn, "Invalid modify actions size provided\n"); + goto dec_ref; + } + + action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR); + if (!action) + goto dec_ref; + + action->rewrite.dmn = dmn; + + ret = dr_action_create_modify_action(dmn, + actions_sz, + actions, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, + u32 vport, u8 vhca_id_valid, + u16 vhca_id) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *vport_dmn; + struct mlx5dr_action *action; + u8 peer_vport; + + peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi); + vport_dmn = peer_vport ? dmn->peer_dmn : dmn; + if (!vport_dmn) { + mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); + return NULL; + } + + if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n"); + return NULL; + } + + vport_cap = mlx5dr_get_vport_cap(&vport_dmn->info.caps, vport); + if (!vport_cap) { + mlx5dr_dbg(dmn, "Failed to get vport %d caps\n", vport); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_VPORT); + if (!action) + return NULL; + + action->vport.dmn = vport_dmn; + action->vport.caps = vport_cap; + + return action; +} + +int mlx5dr_action_destroy(struct mlx5dr_action *action) +{ + if (refcount_read(&action->refcount) > 1) + return -EBUSY; + + switch (action->action_type) { + case DR_ACTION_TYP_FT: + if (!action->dest_tbl.is_fw_tbl) + refcount_dec(&action->dest_tbl.tbl->refcount); + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + mlx5dr_icm_free_chunk(action->rewrite.chunk); + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev, + action->reformat.reformat_id); + refcount_dec(&action->reformat.dmn->refcount); + break; + case DR_ACTION_TYP_MODIFY_HDR: + mlx5dr_icm_free_chunk(action->rewrite.chunk); + refcount_dec(&action->rewrite.dmn->refcount); + break; + default: + break; + } + + kfree(action); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c new file mode 100644 index 000000000000..41662c4e2664 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, + u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + int err; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport); + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *icm_address_rx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_rx); + *icm_address_tx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_tx); + return 0; +} + +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, + u16 vport_number, u16 *gvmi) +{ + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + int out_size; + void *out; + int err; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, other_function, other_vport); + MLX5_SET(query_hca_cap_in, in, function_id, vport_number); + MLX5_SET(query_hca_cap_in, in, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | + HCA_CAP_OPMOD_GET_CUR); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size); + if (err) { + kfree(out); + return err; + } + + *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); + + kfree(out); + return 0; +} + +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps) +{ + caps->drop_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_rx); + caps->drop_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_tx); + caps->uplink_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_rx); + caps->uplink_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_tx); + caps->sw_owner = + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, + sw_owner); + + return 0; +} + +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps) +{ + caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required); + caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); + caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); + caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + + if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { + caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); + caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); + } + + if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) { + caps->flex_parser_id_icmpv6_dw0 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0); + caps->flex_parser_id_icmpv6_dw1 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1); + } + + caps->nic_rx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address); + caps->nic_tx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address); + caps->nic_tx_allow_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address); + + caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner); + caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level); + + caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner); + + caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size); + caps->hdr_modify_icm_addr = + MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address); + + caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port); + + return 0; +} + +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output) +{ + u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {}; + int err; + + MLX5_SET(query_flow_table_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_TABLE); + + MLX5_SET(query_flow_table_in, in, table_type, type); + MLX5_SET(query_flow_table_in, in, table_id, table_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + output->status = MLX5_GET(query_flow_table_out, out, status); + output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level); + + output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_1); + output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_0); + + return 0; +} + +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev) +{ + u32 out[MLX5_ST_SZ_DW(sync_steering_out)] = {}; + u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {}; + + MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u32 vport_id) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context; + unsigned int inlen; + void *in_dests; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + 1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */ + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, table_type, table_type); + MLX5_SET(set_fte_in, in, table_id, table_id); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id); + MLX5_SET(flow_context, in_flow_context, destination_list_size, 1); + MLX5_SET(flow_context, in_flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + MLX5_SET(dest_format_struct, in_dests, destination_type, + MLX5_FLOW_DESTINATION_TYPE_VPORT); + MLX5_SET(dest_format_struct, in_dests, destination_id, vport_id); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id) +{ + u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {}; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {}; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, table_type); + MLX5_SET(delete_fte_in, in, table_id, table_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {}; + void *p_actions; + u32 inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + + num_of_actions * sizeof(u64); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions); + p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(p_actions, actions, num_of_actions * sizeof(u64)); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); +out: + kvfree(in); + return err; +} + +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {}; + + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, table_type); + MLX5_SET(create_flow_group_in, in, table_id, table_id); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *group_id = MLX5_GET(create_flow_group_out, out, group_id); + +out: + kfree(in); + return err; +} + +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {}; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, table_type); + MLX5_SET(destroy_flow_group_in, in, table_id, table_id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + u32 table_type, + u64 icm_addr_rx, + u64 icm_addr_tx, + u8 level, + bool sw_owner, + bool term_tbl, + u64 *fdb_rx_icm_addr, + u32 *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; + void *ft_mdev; + int err; + + MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); + MLX5_SET(create_flow_table_in, in, table_type, table_type); + + ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); + MLX5_SET(flow_table_context, ft_mdev, termination_table, term_tbl); + MLX5_SET(flow_table_context, ft_mdev, sw_owner, sw_owner); + MLX5_SET(flow_table_context, ft_mdev, level, level); + + if (sw_owner) { + /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX + * icm_addr_1 used for FDB TX + */ + if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, icm_addr_rx); + } else if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, icm_addr_tx); + } else if (table_type == MLX5_FLOW_TABLE_TYPE_FDB) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, icm_addr_rx); + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_1, icm_addr_tx); + } + } + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *table_id = MLX5_GET(create_flow_table_out, out, table_id); + if (!sw_owner && table_type == MLX5_FLOW_TABLE_TYPE_FDB) + *fdb_rx_icm_addr = + (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40; + + return 0; +} + +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type) +{ + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {}; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, table_type); + MLX5_SET(destroy_flow_table_in, in, table_id, table_id); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {}; + size_t inlen, cmd_data_sz, cmd_total_sz; + void *prctx; + void *pdata; + void *in; + int err; + + cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in); + cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in, + packet_reformat_context.reformat_data); + inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + + prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context); + pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data); + + MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt); + MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size); + memcpy(pdata, reformat_data, reformat_size); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + return err; + + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); + kvfree(in); + + return err; +} + +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {}; + + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + reformat_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {}; + int err; + + MLX5_SET(query_roce_address_in, in, opcode, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS); + + MLX5_SET(query_roce_address_in, in, roce_address_index, index); + MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + memcpy(&attr->gid, + MLX5_ADDR_OF(query_roce_address_out, + out, roce_address.source_l3_address), + sizeof(attr->gid)); + memcpy(attr->mac, + MLX5_ADDR_OF(query_roce_address_out, out, + roce_address.source_mac_47_32), + sizeof(attr->mac)); + + if (MLX5_GET(query_roce_address_out, out, + roce_address.roce_version) == MLX5_ROCE_VERSION_2) + attr->roce_ver = MLX5_ROCE_VERSION_2; + else + attr->roce_ver = MLX5_ROCE_VERSION_1; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c new file mode 100644 index 000000000000..9e2eccbb1eb8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_crc32.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +/* Copyright (c) 2011-2015 Stephan Brumme. All rights reserved. + * Slicing-by-16 contributed by Bulat Ziganshin + * + * This software is provided 'as-is', without any express or implied warranty. + * In no event will the author be held liable for any damages arising from the + * of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. + * 2. If you use this software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * 3. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * Taken from http://create.stephan-brumme.com/crc32/ and adapted. + */ + +#include "dr_types.h" + +#define DR_STE_CRC_POLY 0xEDB88320L + +static u32 dr_ste_crc_tab32[8][256]; + +static void dr_crc32_calc_lookup_entry(u32 (*tbl)[256], u8 i, u8 j) +{ + tbl[i][j] = (tbl[i - 1][j] >> 8) ^ tbl[0][tbl[i - 1][j] & 0xff]; +} + +void mlx5dr_crc32_init_table(void) +{ + u32 crc, i, j; + + for (i = 0; i < 256; i++) { + crc = i; + for (j = 0; j < 8; j++) { + if (crc & 0x00000001L) + crc = (crc >> 1) ^ DR_STE_CRC_POLY; + else + crc = crc >> 1; + } + dr_ste_crc_tab32[0][i] = crc; + } + + /* Init CRC lookup tables according to crc_slice_8 algorithm */ + for (i = 0; i < 256; i++) { + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 1, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 2, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 3, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 4, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 5, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 6, i); + dr_crc32_calc_lookup_entry(dr_ste_crc_tab32, 7, i); + } +} + +/* Compute CRC32 (Slicing-by-8 algorithm) */ +u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length) +{ + const u32 *curr = (const u32 *)input_data; + const u8 *curr_char; + u32 crc = 0, one, two; + + if (!input_data) + return 0; + + /* Process eight bytes at once (Slicing-by-8) */ + while (length >= 8) { + one = *curr++ ^ crc; + two = *curr++; + + crc = dr_ste_crc_tab32[0][(two >> 24) & 0xff] + ^ dr_ste_crc_tab32[1][(two >> 16) & 0xff] + ^ dr_ste_crc_tab32[2][(two >> 8) & 0xff] + ^ dr_ste_crc_tab32[3][two & 0xff] + ^ dr_ste_crc_tab32[4][(one >> 24) & 0xff] + ^ dr_ste_crc_tab32[5][(one >> 16) & 0xff] + ^ dr_ste_crc_tab32[6][(one >> 8) & 0xff] + ^ dr_ste_crc_tab32[7][one & 0xff]; + + length -= 8; + } + + curr_char = (const u8 *)curr; + /* Remaining 1 to 7 bytes (standard algorithm) */ + while (length-- != 0) + crc = (crc >> 8) ^ dr_ste_crc_tab32[0][(crc & 0xff) + ^ *curr_char++]; + + return ((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) | + ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c new file mode 100644 index 000000000000..5b24732b18c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/mlx5/eswitch.h> +#include "dr_types.h" + +static int dr_domain_init_cache(struct mlx5dr_domain *dmn) +{ + /* Per vport cached FW FT for checksum recalculation, this + * recalculation is needed due to a HW bug. + */ + dmn->cache.recalc_cs_ft = kcalloc(dmn->info.caps.num_vports, + sizeof(dmn->cache.recalc_cs_ft[0]), + GFP_KERNEL); + if (!dmn->cache.recalc_cs_ft) + return -ENOMEM; + + return 0; +} + +static void dr_domain_uninit_cache(struct mlx5dr_domain *dmn) +{ + int i; + + for (i = 0; i < dmn->info.caps.num_vports; i++) { + if (!dmn->cache.recalc_cs_ft[i]) + continue; + + mlx5dr_fw_destroy_recalc_cs_ft(dmn, dmn->cache.recalc_cs_ft[i]); + } + + kfree(dmn->cache.recalc_cs_ft); +} + +int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u32 vport_num, + u64 *rx_icm_addr) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + + recalc_cs_ft = dmn->cache.recalc_cs_ft[vport_num]; + if (!recalc_cs_ft) { + /* Table not in cache, need to allocate a new one */ + recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num); + if (!recalc_cs_ft) + return -EINVAL; + + dmn->cache.recalc_cs_ft[vport_num] = recalc_cs_ft; + } + + *rx_icm_addr = recalc_cs_ft->rx_icm_addr; + + return 0; +} + +static int dr_domain_init_resources(struct mlx5dr_domain *dmn) +{ + int ret; + + ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn); + if (ret) { + mlx5dr_dbg(dmn, "Couldn't allocate PD\n"); + return ret; + } + + dmn->uar = mlx5_get_uars_page(dmn->mdev); + if (!dmn->uar) { + mlx5dr_err(dmn, "Couldn't allocate UAR\n"); + ret = -ENOMEM; + goto clean_pd; + } + + dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE); + if (!dmn->ste_icm_pool) { + mlx5dr_err(dmn, "Couldn't get icm memory\n"); + ret = -ENOMEM; + goto clean_uar; + } + + dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION); + if (!dmn->action_icm_pool) { + mlx5dr_err(dmn, "Couldn't get action icm memory\n"); + ret = -ENOMEM; + goto free_ste_icm_pool; + } + + ret = mlx5dr_send_ring_alloc(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create send-ring\n"); + goto free_action_icm_pool; + } + + return 0; + +free_action_icm_pool: + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); +free_ste_icm_pool: + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); +clean_uar: + mlx5_put_uars_page(dmn->mdev, dmn->uar); +clean_pd: + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); + + return ret; +} + +static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn) +{ + mlx5dr_send_ring_free(dmn, dmn->send_ring); + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); + mlx5_put_uars_page(dmn->mdev, dmn->uar); + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); +} + +static int dr_domain_query_vport(struct mlx5dr_domain *dmn, + bool other_vport, + u16 vport_number) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + int ret; + + vport_caps = &dmn->info.caps.vports_caps[vport_number]; + + ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev, + other_vport, + vport_number, + &vport_caps->icm_address_rx, + &vport_caps->icm_address_tx); + if (ret) + return ret; + + ret = mlx5dr_cmd_query_gvmi(dmn->mdev, + other_vport, + vport_number, + &vport_caps->vport_gvmi); + if (ret) + return ret; + + vport_caps->num = vport_number; + vport_caps->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_vports(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps; + struct mlx5dr_cmd_vport_cap *wire_vport; + int vport; + int ret; + + /* Query vports (except wire vport) */ + for (vport = 0; vport < dmn->info.caps.num_esw_ports - 1; vport++) { + ret = dr_domain_query_vport(dmn, !!vport, vport); + if (ret) + return ret; + } + + /* Last vport is the wire port */ + wire_vport = &dmn->info.caps.vports_caps[vport]; + wire_vport->num = WIRE_PORT; + wire_vport->icm_address_rx = esw_caps->uplink_icm_address_rx; + wire_vport->icm_address_tx = esw_caps->uplink_icm_address_tx; + wire_vport->vport_gvmi = 0; + wire_vport->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + int ret; + + if (!dmn->info.caps.eswitch_manager) + return -EOPNOTSUPP; + + ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps); + if (ret) + return ret; + + dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner; + dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx; + dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx; + + dmn->info.caps.vports_caps = kcalloc(dmn->info.caps.num_esw_ports, + sizeof(dmn->info.caps.vports_caps[0]), + GFP_KERNEL); + if (!dmn->info.caps.vports_caps) + return -ENOMEM; + + ret = dr_domain_query_vports(dmn); + if (ret) { + mlx5dr_dbg(dmn, "Failed to query vports caps\n"); + goto free_vports_caps; + } + + dmn->info.caps.num_vports = dmn->info.caps.num_esw_ports - 1; + + return 0; + +free_vports_caps: + kfree(dmn->info.caps.vports_caps); + dmn->info.caps.vports_caps = NULL; + return ret; +} + +static int dr_domain_caps_init(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + int ret; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) { + mlx5dr_dbg(dmn, "Failed to allocate domain, bad link type\n"); + return -EOPNOTSUPP; + } + + dmn->info.caps.num_esw_ports = mlx5_eswitch_get_total_vports(mdev); + + ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps); + if (ret) + return ret; + + ret = dr_domain_query_fdb_caps(mdev, dmn); + if (ret) + return ret; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + if (!dmn->info.caps.rx_sw_owner) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX; + dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address; + dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + if (!dmn->info.caps.tx_sw_owner) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; + dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_FDB: + if (!dmn->info.caps.eswitch_manager) + return -ENOTSUPP; + + if (!dmn->info.caps.fdb_sw_owner) + return -ENOTSUPP; + + dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX; + dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX; + vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0); + if (!vport_cap) { + mlx5dr_dbg(dmn, "Failed to get esw manager vport\n"); + return -ENOENT; + } + + dmn->info.supp_sw_steering = true; + dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx; + dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx; + dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address; + break; + default: + mlx5dr_dbg(dmn, "Invalid domain\n"); + ret = -EINVAL; + break; + } + + return ret; +} + +static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn) +{ + kfree(dmn->info.caps.vports_caps); +} + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) +{ + struct mlx5dr_domain *dmn; + int ret; + + if (type > MLX5DR_DOMAIN_TYPE_FDB) + return NULL; + + dmn = kzalloc(sizeof(*dmn), GFP_KERNEL); + if (!dmn) + return NULL; + + dmn->mdev = mdev; + dmn->type = type; + refcount_set(&dmn->refcount, 1); + mutex_init(&dmn->mutex); + + if (dr_domain_caps_init(mdev, dmn)) { + mlx5dr_dbg(dmn, "Failed init domain, no caps\n"); + goto free_domain; + } + + dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K; + dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K, + dmn->info.caps.log_icm_size); + + if (!dmn->info.supp_sw_steering) { + mlx5dr_err(dmn, "SW steering is not supported\n"); + goto uninit_caps; + } + + /* Allocate resources */ + ret = dr_domain_init_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed init domain resources\n"); + goto uninit_caps; + } + + ret = dr_domain_init_cache(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed initialize domain cache\n"); + goto uninit_resourses; + } + + /* Init CRC table for htbl CRC calculation */ + mlx5dr_crc32_init_table(); + + return dmn; + +uninit_resourses: + dr_domain_uninit_resources(dmn); +uninit_caps: + dr_domain_caps_uninit(dmn); +free_domain: + kfree(dmn); + return NULL; +} + +/* Assure synchronization of the device steering tables with updates made by SW + * insertion. + */ +int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) +{ + int ret = 0; + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { + mutex_lock(&dmn->mutex); + ret = mlx5dr_send_ring_force_drain(dmn); + mutex_unlock(&dmn->mutex); + if (ret) + return ret; + } + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) + ret = mlx5dr_cmd_sync_steering(dmn->mdev); + + return ret; +} + +int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) +{ + if (refcount_read(&dmn->refcount) > 1) + return -EBUSY; + + /* make sure resources are not used by the hardware */ + mlx5dr_cmd_sync_steering(dmn->mdev); + dr_domain_uninit_cache(dmn); + dr_domain_uninit_resources(dmn); + dr_domain_caps_uninit(dmn); + mutex_destroy(&dmn->mutex); + kfree(dmn); + return 0; +} + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn) +{ + mutex_lock(&dmn->mutex); + + if (dmn->peer_dmn) + refcount_dec(&dmn->peer_dmn->refcount); + + dmn->peer_dmn = peer_dmn; + + if (dmn->peer_dmn) + refcount_inc(&dmn->peer_dmn->refcount); + + mutex_unlock(&dmn->mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c new file mode 100644 index 000000000000..60ef6e6171e3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include "dr_types.h" + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + u32 table_id, group_id, modify_hdr_id; + u64 rx_icm_addr, modify_ttl_action; + int ret; + + recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL); + if (!recalc_cs_ft) + return NULL; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, + 0, 0, dmn->info.caps.max_ft_level - 1, + false, true, &rx_icm_addr, &table_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret); + goto free_ttl_tbl; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, &group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret); + goto destroy_flow_table; + } + + /* Modify TTL action by adding zero to trigger CS recalculation */ + modify_ttl_action = 0; + MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD); + MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL); + + ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1, + &modify_ttl_action, + &modify_hdr_id); + if (ret) { + mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret); + goto destroy_flow_group; + } + + ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id, modify_hdr_id, + vport_num); + if (ret) { + mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret); + goto dealloc_modify_header; + } + + recalc_cs_ft->modify_hdr_id = modify_hdr_id; + recalc_cs_ft->rx_icm_addr = rx_icm_addr; + recalc_cs_ft->table_id = table_id; + recalc_cs_ft->group_id = group_id; + + return recalc_cs_ft; + +dealloc_modify_header: + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id); +destroy_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id); +destroy_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB); +free_ttl_tbl: + kfree(recalc_cs_ft); + return NULL; +} + +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id); + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id, + recalc_cs_ft->group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, + recalc_cs_ft->table_id, + MLX5_FLOW_TABLE_TYPE_FDB); + + kfree(recalc_cs_ft); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c new file mode 100644 index 000000000000..913f1e5aaaf2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 +#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024) + +struct mlx5dr_icm_pool; + +struct mlx5dr_icm_bucket { + struct mlx5dr_icm_pool *pool; + + /* Chunks that aren't visible to HW not directly and not in cache */ + struct list_head free_list; + unsigned int free_list_count; + + /* Used chunks, HW may be accessing this memory */ + struct list_head used_list; + unsigned int used_list_count; + + /* HW may be accessing this memory but at some future, + * undetermined time, it might cease to do so. Before deciding to call + * sync_ste, this list is moved to sync_list + */ + struct list_head hot_list; + unsigned int hot_list_count; + + /* Pending sync list, entries from the hot list are moved to this list. + * sync_ste is executed and then sync_list is concatenated to the free list + */ + struct list_head sync_list; + unsigned int sync_list_count; + + u32 total_chunks; + u32 num_of_entries; + u32 entry_size; + /* protect the ICM bucket */ + struct mutex mutex; +}; + +struct mlx5dr_icm_pool { + struct mlx5dr_icm_bucket *buckets; + enum mlx5dr_icm_type icm_type; + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + enum mlx5dr_icm_chunk_size num_of_buckets; + struct list_head icm_mr_list; + /* protect the ICM MR list */ + struct mutex mr_mutex; + struct mlx5dr_domain *dmn; +}; + +struct mlx5dr_icm_dm { + u32 obj_id; + enum mlx5_sw_icm_type type; + phys_addr_t addr; + size_t length; +}; + +struct mlx5dr_icm_mr { + struct mlx5dr_icm_pool *pool; + struct mlx5_core_mkey mkey; + struct mlx5dr_icm_dm dm; + size_t used_length; + size_t length; + u64 icm_start_addr; + struct list_head mr_list; +}; + +static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, + u32 pd, u64 length, u64 start_addr, int mode, + struct mlx5_core_mkey *mkey) +{ + u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode_1_0, mode); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) { + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + } + + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, pd); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); + + return mlx5_core_create_mkey(mdev, mkey, in, inlen); +} + +static struct mlx5dr_icm_mr * +dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool, + enum mlx5_sw_icm_type type, + size_t align_base) +{ + struct mlx5_core_dev *mdev = pool->dmn->mdev; + struct mlx5dr_icm_mr *icm_mr; + size_t align_diff; + int err; + + icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL); + if (!icm_mr) + return NULL; + + icm_mr->pool = pool; + INIT_LIST_HEAD(&icm_mr->mr_list); + + icm_mr->dm.type = type; + + /* 2^log_biggest_table * entry-size * double-for-alignment */ + icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * 2; + + err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + &icm_mr->dm.addr, &icm_mr->dm.obj_id); + if (err) { + mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err); + goto free_icm_mr; + } + + /* Register device memory */ + err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn, + icm_mr->dm.length, + icm_mr->dm.addr, + MLX5_MKC_ACCESS_MODE_SW_ICM, + &icm_mr->mkey); + if (err) { + mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err); + goto free_dm; + } + + icm_mr->icm_start_addr = icm_mr->dm.addr; + + align_diff = icm_mr->icm_start_addr % align_base; + if (align_diff) + icm_mr->used_length = align_base - align_diff; + + list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list); + + return icm_mr; + +free_dm: + mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + icm_mr->dm.addr, icm_mr->dm.obj_id); +free_icm_mr: + kvfree(icm_mr); + return NULL; +} + +static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) +{ + struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev; + struct mlx5dr_icm_dm *dm = &icm_mr->dm; + + list_del(&icm_mr->mr_list); + mlx5_core_destroy_mkey(mdev, &icm_mr->mkey); + mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, + dm->addr, dm->obj_id); + kvfree(icm_mr); +} + +static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + chunk->ste_arr = kvzalloc(bucket->num_of_entries * + sizeof(chunk->ste_arr[0]), GFP_KERNEL); + if (!chunk->ste_arr) + return -ENOMEM; + + chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries * + DR_STE_SIZE_REDUCED, GFP_KERNEL); + if (!chunk->hw_ste_arr) + goto out_free_ste_arr; + + chunk->miss_list = kvmalloc(bucket->num_of_entries * + sizeof(chunk->miss_list[0]), GFP_KERNEL); + if (!chunk->miss_list) + goto out_free_hw_ste_arr; + + return 0; + +out_free_hw_ste_arr: + kvfree(chunk->hw_ste_arr); +out_free_ste_arr: + kvfree(chunk->ste_arr); + return -ENOMEM; +} + +static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket) +{ + size_t mr_free_size, mr_req_size, mr_row_size; + struct mlx5dr_icm_pool *pool = bucket->pool; + struct mlx5dr_icm_mr *icm_mr = NULL; + struct mlx5dr_icm_chunk *chunk; + enum mlx5_sw_icm_type dm_type; + size_t align_base; + int i, err = 0; + + mr_req_size = bucket->num_of_entries * bucket->entry_size; + mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type); + + if (pool->icm_type == DR_ICM_TYPE_STE) { + dm_type = MLX5_SW_ICM_TYPE_STEERING; + /* Align base is the biggest chunk size / row size */ + align_base = mr_row_size; + } else { + dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY; + /* Align base is 64B */ + align_base = DR_ICM_MODIFY_HDR_ALIGN_BASE; + } + + mutex_lock(&pool->mr_mutex); + if (!list_empty(&pool->icm_mr_list)) { + icm_mr = list_last_entry(&pool->icm_mr_list, + struct mlx5dr_icm_mr, mr_list); + + if (icm_mr) + mr_free_size = icm_mr->dm.length - icm_mr->used_length; + } + + if (!icm_mr || mr_free_size < mr_row_size) { + icm_mr = dr_icm_pool_mr_create(pool, dm_type, align_base); + if (!icm_mr) { + err = -ENOMEM; + goto out_err; + } + } + + /* Create memory aligned chunks */ + for (i = 0; i < mr_row_size / mr_req_size; i++) { + chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) { + err = -ENOMEM; + goto out_err; + } + + chunk->bucket = bucket; + chunk->rkey = icm_mr->mkey.key; + /* mr start addr is zero based */ + chunk->mr_addr = icm_mr->used_length; + chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length; + icm_mr->used_length += mr_req_size; + chunk->num_of_entries = bucket->num_of_entries; + chunk->byte_size = chunk->num_of_entries * bucket->entry_size; + + if (pool->icm_type == DR_ICM_TYPE_STE) { + err = dr_icm_chunk_ste_init(chunk); + if (err) + goto out_free_chunk; + } + + INIT_LIST_HEAD(&chunk->chunk_list); + list_add(&chunk->chunk_list, &bucket->free_list); + bucket->free_list_count++; + bucket->total_chunks++; + } + mutex_unlock(&pool->mr_mutex); + return 0; + +out_free_chunk: + kvfree(chunk); +out_err: + mutex_unlock(&pool->mr_mutex); + return err; +} + +static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) +{ + kvfree(chunk->miss_list); + kvfree(chunk->hw_ste_arr); + kvfree(chunk->ste_arr); +} + +static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + list_del(&chunk->chunk_list); + bucket->total_chunks--; + + if (bucket->pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_chunk_ste_cleanup(chunk); + + kvfree(chunk); +} + +static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *bucket, + enum mlx5dr_icm_chunk_size chunk_size) +{ + if (pool->icm_type == DR_ICM_TYPE_STE) + bucket->entry_size = DR_STE_SIZE; + else + bucket->entry_size = DR_MODIFY_ACTION_SIZE; + + bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + bucket->pool = pool; + mutex_init(&bucket->mutex); + INIT_LIST_HEAD(&bucket->free_list); + INIT_LIST_HEAD(&bucket->used_list); + INIT_LIST_HEAD(&bucket->hot_list); + INIT_LIST_HEAD(&bucket->sync_list); +} + +static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket) +{ + struct mlx5dr_icm_chunk *chunk, *next; + + mutex_destroy(&bucket->mutex); + list_splice_tail_init(&bucket->sync_list, &bucket->free_list); + list_splice_tail_init(&bucket->hot_list, &bucket->free_list); + + list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list) + dr_icm_chunk_destroy(chunk); + + WARN_ON(bucket->total_chunks != 0); + + /* Cleanup of unreturned chunks */ + list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list) + dr_icm_chunk_destroy(chunk); +} + +static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool) +{ + u64 hot_size = 0; + int chunk_order; + + for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++) + hot_size += pool->buckets[chunk_order].hot_list_count * + mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type); + + return hot_size; +} + +static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *bucket) +{ + u64 bytes_for_sync; + + bytes_for_sync = dr_icm_hot_mem_size(pool); + if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count) + return false; + + return true; +} + +static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->hot_list, &bucket->sync_list); + bucket->sync_list_count += bucket->hot_list_count; + bucket->hot_list_count = 0; +} + +static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->sync_list, &bucket->free_list); + bucket->free_list_count += bucket->sync_list_count; + bucket->sync_list_count = 0; +} + +static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket) +{ + list_splice_tail_init(&bucket->sync_list, &bucket->hot_list); + bucket->hot_list_count += bucket->sync_list_count; + bucket->sync_list_count = 0; +} + +static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_start(bucket); + continue; + } + + /* Freeing the mutex is done at the end of that process, after + * sync_ste was executed at dr_icm_chill_buckets_end func. + */ + if (mutex_trylock(&bucket->mutex)) { + dr_icm_chill_bucket_start(bucket); + buckets[i] = true; + } + } +} + +static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_end(bucket); + continue; + } + + if (!buckets[i]) + continue; + + dr_icm_chill_bucket_end(bucket); + mutex_unlock(&bucket->mutex); + } +} + +static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool, + struct mlx5dr_icm_bucket *cb, + bool buckets[DR_CHUNK_SIZE_MAX]) +{ + struct mlx5dr_icm_bucket *bucket; + int i; + + for (i = 0; i < pool->num_of_buckets; i++) { + bucket = &pool->buckets[i]; + if (bucket == cb) { + dr_icm_chill_bucket_abort(bucket); + continue; + } + + if (!buckets[i]) + continue; + + dr_icm_chill_bucket_abort(bucket); + mutex_unlock(&bucket->mutex); + } +} + +/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and + * also memory used for HW STE management for optimizations. + */ +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size) +{ + struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */ + bool buckets[DR_CHUNK_SIZE_MAX] = {}; + struct mlx5dr_icm_bucket *bucket; + int err; + + if (chunk_size > pool->max_log_chunk_sz) + return NULL; + + bucket = &pool->buckets[chunk_size]; + + mutex_lock(&bucket->mutex); + + /* Take chunk from pool if available, otherwise allocate new chunks */ + if (list_empty(&bucket->free_list)) { + if (dr_icm_reuse_hot_entries(pool, bucket)) { + dr_icm_chill_buckets_start(pool, bucket, buckets); + err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); + if (err) { + dr_icm_chill_buckets_abort(pool, bucket, buckets); + mlx5dr_dbg(pool->dmn, "Sync_steering failed\n"); + chunk = NULL; + goto out; + } + dr_icm_chill_buckets_end(pool, bucket, buckets); + } else { + dr_icm_chunks_create(bucket); + } + } + + if (!list_empty(&bucket->free_list)) { + chunk = list_last_entry(&bucket->free_list, + struct mlx5dr_icm_chunk, + chunk_list); + if (chunk) { + list_del_init(&chunk->chunk_list); + list_add_tail(&chunk->chunk_list, &bucket->used_list); + bucket->free_list_count--; + bucket->used_list_count++; + } + } +out: + mutex_unlock(&bucket->mutex); + return chunk; +} + +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_bucket *bucket = chunk->bucket; + + if (bucket->pool->icm_type == DR_ICM_TYPE_STE) { + memset(chunk->ste_arr, 0, + bucket->num_of_entries * sizeof(chunk->ste_arr[0])); + memset(chunk->hw_ste_arr, 0, + bucket->num_of_entries * DR_STE_SIZE_REDUCED); + } + + mutex_lock(&bucket->mutex); + list_del_init(&chunk->chunk_list); + list_add_tail(&chunk->chunk_list, &bucket->hot_list); + bucket->hot_list_count++; + bucket->used_list_count--; + mutex_unlock(&bucket->mutex); +} + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type) +{ + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + struct mlx5dr_icm_pool *pool; + int i; + + if (icm_type == DR_ICM_TYPE_STE) + max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; + else + max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->buckets = kcalloc(max_log_chunk_sz + 1, + sizeof(pool->buckets[0]), + GFP_KERNEL); + if (!pool->buckets) + goto free_pool; + + pool->dmn = dmn; + pool->icm_type = icm_type; + pool->max_log_chunk_sz = max_log_chunk_sz; + pool->num_of_buckets = max_log_chunk_sz + 1; + INIT_LIST_HEAD(&pool->icm_mr_list); + + for (i = 0; i < pool->num_of_buckets; i++) + dr_icm_bucket_init(pool, &pool->buckets[i], i); + + mutex_init(&pool->mr_mutex); + + return pool; + +free_pool: + kvfree(pool); + return NULL; +} + +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_mr *icm_mr, *next; + int i; + + mutex_destroy(&pool->mr_mutex); + + list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list) + dr_icm_pool_mr_destroy(icm_mr); + + for (i = 0; i < pool->num_of_buckets; i++) + dr_icm_bucket_cleanup(&pool->buckets[i]); + + kfree(pool->buckets); + kvfree(pool); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c new file mode 100644 index 000000000000..01008cd66f75 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -0,0 +1,770 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->smac_47_16 || spec->smac_15_0); +} + +static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dmac_47_16 || spec->dmac_15_0); +} + +static bool dr_mask_is_src_addr_set(struct mlx5dr_match_spec *spec) +{ + return (spec->src_ip_127_96 || spec->src_ip_95_64 || + spec->src_ip_63_32 || spec->src_ip_31_0); +} + +static bool dr_mask_is_dst_addr_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_127_96 || spec->dst_ip_95_64 || + spec->dst_ip_63_32 || spec->dst_ip_31_0); +} + +static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->ip_protocol || spec->frag || spec->tcp_flags || + spec->ip_ecn || spec->ip_dscp); +} + +static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->tcp_sport || spec->tcp_dport || + spec->udp_sport || spec->udp_dport); +} + +static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_31_0 || spec->src_ip_31_0); +} + +static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec) +{ + return (dr_mask_is_l3_base_set(spec) || + dr_mask_is_tcp_udp_base_set(spec) || + dr_mask_is_ipv4_set(spec)); +} + +static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc) +{ + return misc->vxlan_vni; +} + +static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ttl_hoplimit; +} + +#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \ + (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \ + (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \ + (_spec).ethertype || (_spec).ip_version || \ + (_misc)._inner_outer##_second_vid || \ + (_misc)._inner_outer##_second_cfi || \ + (_misc)._inner_outer##_second_prio || \ + (_misc)._inner_outer##_second_cvlan_tag || \ + (_misc)._inner_outer##_second_svlan_tag) + +#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \ + dr_mask_is_l3_base_set(&(_spec)) || \ + dr_mask_is_tcp_udp_base_set(&(_spec)) || \ + dr_mask_is_ttl_set(&(_spec)) || \ + (_misc)._inner_outer##_ipv6_flow_label) + +#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \ + (_misc3)._inner_outer##_tcp_seq_num || \ + (_misc3)._inner_outer##_tcp_ack_num) + +#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \ + (_misc2)._inner_outer##_first_mpls_label || \ + (_misc2)._inner_outer##_first_mpls_exp || \ + (_misc2)._inner_outer##_first_mpls_s_bos || \ + (_misc2)._inner_outer##_first_mpls_ttl) + +static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc) +{ + return (misc->gre_key_h || misc->gre_key_l || + misc->gre_protocol || misc->gre_c_present || + misc->gre_k_present || misc->gre_s_present); +} + +#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \ + (_misc2).outer_first_mpls_over_##gre_udp##_label || \ + (_misc2).outer_first_mpls_over_##gre_udp##_exp || \ + (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \ + (_misc2).outer_first_mpls_over_##gre_udp##_ttl) + +#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \ + DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \ + DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp)) + +static bool dr_mask_is_flex_parser_tnl_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->outer_vxlan_gpe_vni || + misc3->outer_vxlan_gpe_next_protocol || + misc3->outer_vxlan_gpe_flags); +} + +static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->icmpv6_type || misc3->icmpv6_code || + misc3->icmpv6_header_data); +} + +static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2) +{ + return misc2->metadata_reg_a; +} + +static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 || + misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3); +} + +static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 || + misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7); +} + +static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc) +{ + return (misc->source_sqn || misc->source_port); +} + +static bool +dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_domain *dmn) +{ + return dmn->info.caps.flex_protocols & + MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + bool ipv6) +{ + if (ipv6) { + nic_matcher->ste_builder = nic_matcher->ste_builder6; + nic_matcher->num_of_builders = nic_matcher->num_of_builders6; + } else { + nic_matcher->ste_builder = nic_matcher->ste_builder4; + nic_matcher->num_of_builders = nic_matcher->num_of_builders4; + } + + if (!nic_matcher->num_of_builders) { + mlx5dr_dbg(matcher->tbl->dmn, + "Rule not supported on this matcher due to IP related fields\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + bool ipv6) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param mask = {}; + struct mlx5dr_match_misc3 *misc3; + struct mlx5dr_ste_build *sb; + u8 *num_of_builders; + bool inner, rx; + int idx = 0; + int ret, i; + + if (ipv6) { + sb = nic_matcher->ste_builder6; + num_of_builders = &nic_matcher->num_of_builders6; + } else { + sb = nic_matcher->ste_builder4; + num_of_builders = &nic_matcher->num_of_builders4; + } + + rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX; + + /* Create a temporary mask to track and clear used mask fields */ + if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER) + mask.outer = matcher->mask.outer; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC) + mask.misc = matcher->mask.misc; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER) + mask.inner = matcher->mask.inner; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2) + mask.misc2 = matcher->mask.misc2; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3) + mask.misc3 = matcher->mask.misc3; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, NULL); + if (ret) + return ret; + + /* Outer */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = false; + + if (dr_mask_is_wqe_metadata_set(&mask.misc2)) + mlx5dr_ste_build_general_purpose(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_reg_c_0_3_set(&mask.misc2)) + mlx5dr_ste_build_register_0(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_reg_c_4_7_set(&mask.misc2)) + mlx5dr_ste_build_register_1(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) && + (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) { + ret = mlx5dr_ste_build_src_gvmi_qpn(&sb[idx++], &mask, + &dmn->info.caps, + inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.outer) && + dr_mask_is_dmac_set(&mask.outer)) { + ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask, + inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.outer)) + mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx); + + if (ipv6) { + if (dr_mask_is_dst_addr_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_src_addr_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_ttl_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask, + inner, rx); + } + + if (dr_mask_is_flex_parser_tnl_set(&mask.misc3) && + dr_matcher_supp_flex_parser_vxlan_gpe(dmn)) + mlx5dr_ste_build_flex_parser_tnl(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) + mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer)) + mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) + mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, + inner, rx); + + misc3 = &mask.misc3; + if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc3) && + mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) || + (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) && + mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) { + ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + if (ret) + return ret; + } + if (dr_mask_is_gre_set(&mask.misc)) + mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx); + } + + /* Inner */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = true; + + if (dr_mask_is_eth_l2_tnl_set(&mask.misc)) + mlx5dr_ste_build_eth_l2_tnl(&sb[idx++], &mask, inner, rx); + + if (dr_mask_is_smac_set(&mask.inner) && + dr_mask_is_dmac_set(&mask.inner)) { + ret = mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], + &mask, inner, rx); + if (ret) + return ret; + } + + if (dr_mask_is_smac_set(&mask.inner)) + mlx5dr_ste_build_eth_l2_src(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_eth_l2_dst(&sb[idx++], &mask, inner, rx); + + if (ipv6) { + if (dr_mask_is_dst_addr_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_dst(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_src_addr_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_src(&sb[idx++], &mask, + inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, + inner, rx); + + if (dr_mask_is_ttl_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_misc(&sb[idx++], &mask, + inner, rx); + } + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner)) + mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner)) + mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); + + if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) + mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx); + } + /* Empty matcher, takes all */ + if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) + mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); + + if (idx == 0) { + mlx5dr_dbg(dmn, "Cannot generate any valid rules from mask\n"); + return -EINVAL; + } + + /* Check that all mask fields were consumed */ + for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) { + if (((u8 *)&mask)[i] != 0) { + mlx5dr_info(dmn, "Mask contains unsupported parameters\n"); + return -EOPNOTSUPP; + } + } + + *num_of_builders = idx; + + return 0; +} + +static int dr_matcher_connect(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *curr_nic_matcher, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_htbl; + int ret; + + /* Connect end anchor hash table to next_htbl or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + } + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->e_anchor, + &info, info.type == CONNECT_HIT); + if (ret) + return ret; + + /* Connect start hash table to end anchor */ + info.type = CONNECT_MISS; + info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->s_htbl, + &info, false); + if (ret) + return ret; + + /* Connect previous hash table to matcher start hash table */ + if (prev_nic_matcher) + prev_htbl = prev_nic_matcher->e_anchor; + else + prev_htbl = nic_tbl->s_anchor; + + info.type = CONNECT_HIT; + info.hit_next_htbl = curr_nic_matcher->s_htbl; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl, + &info, true); + if (ret) + return ret; + + /* Update the pointing ste and next hash table */ + curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr; + prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; + + if (next_nic_matcher) { + next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr; + curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } + + return 0; +} + +static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher; + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + bool first = true; + int ret; + + next_matcher = NULL; + if (!list_empty(&tbl->matcher_list)) + list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) { + if (tmp_matcher->prio >= matcher->prio) { + next_matcher = tmp_matcher; + break; + } + first = false; + } + + prev_matcher = NULL; + if (next_matcher && !first) + prev_matcher = list_entry(next_matcher->matcher_list.prev, + struct mlx5dr_matcher, + matcher_list); + else if (!first) + prev_matcher = list_entry(tbl->matcher_list.prev, + struct mlx5dr_matcher, + matcher_list); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + ret = dr_matcher_connect(dmn, &matcher->rx, + next_matcher ? &next_matcher->rx : NULL, + prev_matcher ? &prev_matcher->rx : NULL); + if (ret) + return ret; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + ret = dr_matcher_connect(dmn, &matcher->tx, + next_matcher ? &next_matcher->tx : NULL, + prev_matcher ? &prev_matcher->tx : NULL); + if (ret) + return ret; + } + + if (prev_matcher) + list_add(&matcher->matcher_list, &prev_matcher->matcher_list); + else if (next_matcher) + list_add_tail(&matcher->matcher_list, + &next_matcher->matcher_list); + else + list_add(&matcher->matcher_list, &tbl->matcher_list); + + return 0; +} + +static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + mlx5dr_htbl_put(nic_matcher->s_htbl); + mlx5dr_htbl_put(nic_matcher->e_anchor); +} + +static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher) +{ + dr_matcher_uninit_nic(&matcher->rx); + dr_matcher_uninit_nic(&matcher->tx); +} + +static void dr_matcher_uninit(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_matcher_uninit_nic(&matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_matcher_uninit_nic(&matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_matcher_uninit_fdb(matcher); + break; + default: + WARN_ON(true); + break; + } +} + +static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + int ret, ret_v4, ret_v6; + + ret_v4 = dr_matcher_set_ste_builders(matcher, nic_matcher, false); + ret_v6 = dr_matcher_set_ste_builders(matcher, nic_matcher, true); + + if (ret_v4 && ret_v6) { + mlx5dr_dbg(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); + return -EINVAL; + } + + if (!ret_v4) + nic_matcher->ste_builder = nic_matcher->ste_builder4; + else + nic_matcher->ste_builder = nic_matcher->ste_builder6; + + nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_matcher->e_anchor) + return -ENOMEM; + + nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + nic_matcher->ste_builder[0].lu_type, + nic_matcher->ste_builder[0].byte_mask); + if (!nic_matcher->s_htbl) { + ret = -ENOMEM; + goto free_e_htbl; + } + + /* make sure the tables exist while empty */ + mlx5dr_htbl_get(nic_matcher->s_htbl); + mlx5dr_htbl_get(nic_matcher->e_anchor); + + return 0; + +free_e_htbl: + mlx5dr_ste_htbl_free(nic_matcher->e_anchor); + return ret; +} + +static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher) +{ + int ret; + + ret = dr_matcher_init_nic(matcher, &matcher->rx); + if (ret) + return ret; + + ret = dr_matcher_init_nic(matcher, &matcher->tx); + if (ret) + goto uninit_nic_rx; + + return 0; + +uninit_nic_rx: + dr_matcher_uninit_nic(&matcher->rx); + return ret; +} + +static int dr_matcher_init(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret; + + if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { + mlx5dr_info(dmn, "Invalid match criteria attribute\n"); + return -EINVAL; + } + + if (mask) { + if (mask->match_sz > sizeof(struct mlx5dr_match_param)) { + mlx5dr_info(dmn, "Invalid match size attribute\n"); + return -EINVAL; + } + mlx5dr_ste_copy_param(matcher->match_criteria, + &matcher->mask, mask); + } + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + matcher->rx.nic_tbl = &tbl->rx; + ret = dr_matcher_init_nic(matcher, &matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_nic(matcher, &matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + matcher->rx.nic_tbl = &tbl->rx; + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_fdb(matcher); + break; + default: + WARN_ON(true); + return -EINVAL; + } + + return ret; +} + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *tbl, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_matcher *matcher; + int ret; + + refcount_inc(&tbl->refcount); + + matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); + if (!matcher) + goto dec_ref; + + matcher->tbl = tbl; + matcher->prio = priority; + matcher->match_criteria = match_criteria_enable; + refcount_set(&matcher->refcount, 1); + INIT_LIST_HEAD(&matcher->matcher_list); + + mutex_lock(&tbl->dmn->mutex); + + ret = dr_matcher_init(matcher, mask); + if (ret) + goto free_matcher; + + ret = dr_matcher_add_to_tbl(matcher); + if (ret) + goto matcher_uninit; + + mutex_unlock(&tbl->dmn->mutex); + + return matcher; + +matcher_uninit: + dr_matcher_uninit(matcher); +free_matcher: + mutex_unlock(&tbl->dmn->mutex); + kfree(matcher); +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +static int dr_matcher_disconnect(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_anchor; + + if (prev_nic_matcher) + prev_anchor = prev_nic_matcher->e_anchor; + else + prev_anchor = nic_tbl->s_anchor; + + /* Connect previous anchor hash table to next matcher or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr; + prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + prev_anchor->ste_arr[0].next_htbl = NULL; + } + + return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor, + &info, true); +} + +static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher *prev_matcher, *next_matcher; + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret = 0; + + if (list_is_last(&matcher->matcher_list, &tbl->matcher_list)) + next_matcher = NULL; + else + next_matcher = list_next_entry(matcher, matcher_list); + + if (matcher->matcher_list.prev == &tbl->matcher_list) + prev_matcher = NULL; + else + prev_matcher = list_prev_entry(matcher, matcher_list); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + ret = dr_matcher_disconnect(dmn, &tbl->rx, + next_matcher ? &next_matcher->rx : NULL, + prev_matcher ? &prev_matcher->rx : NULL); + if (ret) + return ret; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + ret = dr_matcher_disconnect(dmn, &tbl->tx, + next_matcher ? &next_matcher->tx : NULL, + prev_matcher ? &prev_matcher->tx : NULL); + if (ret) + return ret; + } + + list_del(&matcher->matcher_list); + + return 0; +} + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_table *tbl = matcher->tbl; + + if (refcount_read(&matcher->refcount) > 1) + return -EBUSY; + + mutex_lock(&tbl->dmn->mutex); + + dr_matcher_remove_from_tbl(matcher); + dr_matcher_uninit(matcher); + refcount_dec(&matcher->tbl->refcount); + + mutex_unlock(&tbl->dmn->mutex); + kfree(matcher); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c new file mode 100644 index 000000000000..3bc3f66b8fa8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -0,0 +1,1243 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES) + +struct mlx5dr_rule_action_member { + struct mlx5dr_action *action; + struct list_head list; +}; + +static int dr_rule_append_to_miss_list(struct mlx5dr_ste *new_last_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info_last; + struct mlx5dr_ste *last_ste; + + /* The new entry will be inserted after the last */ + last_ste = list_entry(miss_list->prev, struct mlx5dr_ste, miss_list_node); + WARN_ON(!last_ste); + + ste_info_last = kzalloc(sizeof(*ste_info_last), GFP_KERNEL); + if (!ste_info_last) + return -ENOMEM; + + mlx5dr_ste_set_miss_addr(last_ste->hw_ste, + mlx5dr_ste_get_icm_addr(new_last_ste)); + list_add_tail(&new_last_ste->miss_list_node, miss_list); + + mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_REDUCED, + 0, last_ste->hw_ste, + ste_info_last, send_list, true); + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *ste; + + /* Create new table for miss entry */ + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!new_htbl) { + mlx5dr_dbg(dmn, "Failed allocating collision table\n"); + return NULL; + } + + /* One and only entry, never grows */ + ste = new_htbl->ste_arr; + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + mlx5dr_htbl_get(new_htbl); + + return ste; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste, + struct mlx5dr_ste *orig_ste) +{ + struct mlx5dr_ste *ste; + + ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n"); + return NULL; + } + + ste->ste_chain_location = orig_ste->ste_chain_location; + + /* In collision entry, all members share the same miss_list_head */ + ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste); + + /* Next table */ + if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto free_tbl; + } + + return ste; + +free_tbl: + mlx5dr_ste_free(ste, matcher, nic_matcher); + return NULL; +} + +static int +dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, + struct mlx5dr_domain *dmn) +{ + int ret; + + list_del(&ste_info->send_list); + ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, + ste_info->size, ste_info->offset); + if (ret) + goto out; + /* Copy data to ste, only reduced size, the last 16B (mask) + * is already written to the hw. + */ + memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); + +out: + kfree(ste_info); + return ret; +} + +static int dr_rule_send_update_list(struct list_head *send_ste_list, + struct mlx5dr_domain *dmn, + bool is_reverse) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + int ret; + + if (is_reverse) { + list_for_each_entry_safe_reverse(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } else { + list_for_each_entry_safe(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste) +{ + struct mlx5dr_ste *ste; + + if (list_empty(miss_list)) + return NULL; + + /* Check if hw_ste is present in the list */ + list_for_each_entry(ste, miss_list, miss_list_node) { + if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste)) + return ste; + } + + return NULL; +} + +static struct mlx5dr_ste * +dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *update_list, + struct mlx5dr_ste *col_ste, + u8 *hw_ste) +{ + struct mlx5dr_ste *new_ste; + int ret; + + new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!new_ste) + return NULL; + + /* In collision entry, all members share the same miss_list_head */ + new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste); + + /* Update the previous from the list */ + ret = dr_rule_append_to_miss_list(new_ste, + mlx5dr_ste_get_miss_list(col_ste), + update_list); + if (ret) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed update dup entry\n"); + goto err_exit; + } + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste *new_ste) +{ + new_ste->next_htbl = cur_ste->next_htbl; + new_ste->ste_chain_location = cur_ste->ste_chain_location; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location)) + new_ste->next_htbl->pointing_ste = new_ste; + + /* We need to copy the refcount since this ste + * may have been traversed several times + */ + refcount_set(&new_ste->refcount, refcount_read(&cur_ste->refcount)); + + /* Link old STEs rule_mem list to the new ste */ + mlx5dr_rule_update_rule_member(cur_ste, new_ste); + INIT_LIST_HEAD(&new_ste->rule_list); + list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list); +} + +static struct mlx5dr_ste * +dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste_send_info *ste_info; + bool use_update_list = false; + u8 hw_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste *new_ste; + int new_idx; + u8 sb_idx; + + /* Copy STE mask from the matcher */ + sb_idx = cur_ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); + + /* Copy STE control and tag */ + memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED); + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + + new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); + new_ste = &new_htbl->ste_arr[new_idx]; + + if (mlx5dr_ste_not_used_ste(new_ste)) { + mlx5dr_htbl_get(new_htbl); + list_add_tail(&new_ste->miss_list_node, + mlx5dr_ste_get_miss_list(new_ste)); + } else { + new_ste = dr_rule_rehash_handle_collision(matcher, + nic_matcher, + update_list, + new_ste, + hw_ste); + if (!new_ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed adding collision entry, index: %d\n", + new_idx); + return NULL; + } + new_htbl->ctrl.num_of_collisions++; + use_update_list = true; + } + + memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED); + + new_htbl->ctrl.num_of_valid_entries++; + + if (use_update_list) { + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto err_exit; + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, + hw_ste, ste_info, + update_list, true); + } + + dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste); + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *cur_miss_list, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste; + + if (list_empty(cur_miss_list)) + return 0; + + list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) { + new_ste = dr_rule_rehash_copy_ste(matcher, + nic_matcher, + cur_ste, + new_htbl, + update_list); + if (!new_ste) + goto err_insert; + + list_del(&cur_ste->miss_list_node); + mlx5dr_htbl_put(cur_ste->htbl); + } + return 0; + +err_insert: + mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n"); + WARN_ON(true); + return -EINVAL; +} + +static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *cur_ste; + int cur_entries; + int err = 0; + int i; + + cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size); + + if (cur_entries < 1) { + mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n"); + return -EINVAL; + } + + for (i = 0; i < cur_entries; i++) { + cur_ste = &cur_htbl->ste_arr[i]; + if (mlx5dr_ste_not_used_ste(cur_ste)) /* Empty, nothing to copy */ + continue; + + err = dr_rule_rehash_copy_miss_list(matcher, + nic_matcher, + mlx5dr_ste_get_miss_list(cur_ste), + new_htbl, + update_list); + if (err) + goto clean_copy; + } + +clean_copy: + return err; +} + +static struct mlx5dr_ste_htbl * +dr_rule_rehash_htbl(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list, + enum mlx5dr_icm_chunk_size new_size) +{ + struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_domain_rx_tx *nic_dmn; + u8 formatted_ste[DR_STE_SIZE] = {}; + LIST_HEAD(rehash_table_send_list); + struct mlx5dr_ste *ste_to_update; + struct mlx5dr_ste_htbl *new_htbl; + int err; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + new_size, + cur_htbl->lu_type, + cur_htbl->byte_mask); + if (!new_htbl) { + mlx5dr_err(dmn, "Failed to allocate new hash table\n"); + goto free_ste_info; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi, + nic_dmn, + new_htbl, + formatted_ste, + &info); + + new_htbl->pointing_ste = cur_htbl->pointing_ste; + new_htbl->pointing_ste->next_htbl = new_htbl; + err = dr_rule_rehash_copy_htbl(matcher, + nic_matcher, + cur_htbl, + new_htbl, + &rehash_table_send_list); + if (err) + goto free_new_htbl; + + if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste, + nic_matcher->ste_builder[ste_location - 1].bit_mask)) { + mlx5dr_err(dmn, "Failed writing table to HW\n"); + goto free_new_htbl; + } + + /* Writing to the hw is done in regular order of rehash_table_send_list, + * in order to have the origin data written before the miss address of + * collision entries, if exists. + */ + if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) { + mlx5dr_err(dmn, "Failed updating table to HW\n"); + goto free_ste_list; + } + + /* Connect previous hash table to current */ + if (ste_location == 1) { + /* The previous table is an anchor, anchors size is always one STE */ + struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl; + + /* On matcher s_anchor we keep an extra refcount */ + mlx5dr_htbl_get(new_htbl); + mlx5dr_htbl_put(cur_htbl); + + nic_matcher->s_htbl = new_htbl; + + /* It is safe to operate dr_ste_set_hit_addr on the hw_ste here + * (48B len) which works only on first 32B + */ + mlx5dr_ste_set_hit_addr(prev_htbl->ste_arr[0].hw_ste, + new_htbl->chunk->icm_addr, + new_htbl->chunk->num_of_entries); + + ste_to_update = &prev_htbl->ste_arr[0]; + } else { + mlx5dr_ste_set_hit_addr_by_next_htbl(cur_htbl->pointing_ste->hw_ste, + new_htbl); + ste_to_update = cur_htbl->pointing_ste; + } + + mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_REDUCED, + 0, ste_to_update->hw_ste, ste_info, + update_list, false); + + return new_htbl; + +free_ste_list: + /* Clean all ste_info's from the new table */ + list_for_each_entry_safe(del_ste_info, tmp_ste_info, + &rehash_table_send_list, send_list) { + list_del(&del_ste_info->send_list); + kfree(del_ste_info); + } + +free_new_htbl: + mlx5dr_ste_htbl_free(new_htbl); +free_ste_info: + kfree(ste_info); + mlx5dr_info(dmn, "Failed creating rehash table\n"); + return NULL; +} + +static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + enum mlx5dr_icm_chunk_size new_size; + + new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size); + new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz); + + if (new_size == cur_htbl->chunk_size) + return NULL; /* Skip rehash, we already at the max size */ + + return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location, + update_list, new_size); +} + +static struct mlx5dr_ste * +dr_rule_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_ste *new_ste; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + return NULL; + + new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste); + if (!new_ste) + goto free_send_info; + + if (dr_rule_append_to_miss_list(new_ste, miss_list, send_list)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed to update prev miss_list\n"); + goto err_exit; + } + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + ste->htbl->ctrl.num_of_collisions++; + ste->htbl->ctrl.num_of_valid_entries++; + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); +free_send_info: + kfree(ste_info); + return NULL; +} + +static void dr_rule_remove_action_members(struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + struct mlx5dr_rule_action_member *tmp; + + list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) { + list_del(&action_mem->list); + refcount_dec(&action_mem->action->refcount); + kvfree(action_mem); + } +} + +static int dr_rule_add_action_members(struct mlx5dr_rule *rule, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule_action_member *action_mem; + int i; + + for (i = 0; i < num_actions; i++) { + action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL); + if (!action_mem) + goto free_action_members; + + action_mem->action = actions[i]; + INIT_LIST_HEAD(&action_mem->list); + list_add_tail(&action_mem->list, &rule->rule_actions_list); + refcount_inc(&action_mem->action->refcount); + } + + return 0; + +free_action_members: + dr_rule_remove_action_members(rule); + return -ENOMEM; +} + +/* While the pointer of ste is no longer valid, like while moving ste to be + * the first in the miss_list, and to be in the origin table, + * all rule-members that are attached to this ste should update their ste member + * to the new pointer + */ +void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste, + struct mlx5dr_ste *new_ste) +{ + struct mlx5dr_rule_member *rule_mem; + + if (!list_empty(&ste->rule_list)) + list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list) + rule_mem->ste = new_ste; +} + +static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + struct mlx5dr_rule_member *rule_mem; + struct mlx5dr_rule_member *tmp_mem; + + if (list_empty(&nic_rule->rule_members_list)) + return; + list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) { + list_del(&rule_mem->list); + list_del(&rule_mem->use_ste_list); + mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher); + kvfree(rule_mem); + } +} + +static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + + if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size) + return false; + + if (!ctrl->may_grow) + return false; + + if (ctrl->num_of_collisions >= ctrl->increase_threshold && + (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold) + return true; + + return false; +} + +static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste) +{ + struct mlx5dr_rule_member *rule_mem; + + rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL); + if (!rule_mem) + return -ENOMEM; + + rule_mem->ste = ste; + list_add_tail(&rule_mem->list, &nic_rule->rule_members_list); + + list_add_tail(&rule_mem->use_ste_list, &ste->rule_list); + + return 0; +} + +static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste *last_ste, + u8 *hw_ste_arr, + u32 new_hw_ste_arr_sz) +{ + struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher; + struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES]; + u8 num_of_builders = nic_matcher->num_of_builders; + struct mlx5dr_matcher *matcher = rule->matcher; + u8 *curr_hw_ste, *prev_hw_ste; + struct mlx5dr_ste *action_ste; + int i, k, ret; + + /* Two cases: + * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste + * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added + * to support the action. + */ + if (num_of_builders == new_hw_ste_arr_sz) + return 0; + + for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) { + curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE; + prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE); + action_ste = dr_rule_create_collision_htbl(matcher, + nic_matcher, + curr_hw_ste); + if (!action_ste) + return -ENOMEM; + + mlx5dr_ste_get(action_ste); + + /* While free ste we go over the miss list, so add this ste to the list */ + list_add_tail(&action_ste->miss_list_node, + mlx5dr_ste_get_miss_list(action_ste)); + + ste_info_arr[k] = kzalloc(sizeof(*ste_info_arr[k]), + GFP_KERNEL); + if (!ste_info_arr[k]) + goto err_exit; + + /* Point current ste to the new action */ + mlx5dr_ste_set_hit_addr_by_next_htbl(prev_hw_ste, action_ste->htbl); + ret = dr_rule_add_member(nic_rule, action_ste); + if (ret) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed adding rule member\n"); + goto free_ste_info; + } + mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0, + curr_hw_ste, + ste_info_arr[k], + send_ste_list, false); + } + + return 0; + +free_ste_info: + kfree(ste_info_arr[k]); +err_exit: + mlx5dr_ste_put(action_ste, matcher, nic_matcher); + return -ENOMEM; +} + +static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste *ste, + u8 ste_location, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_send_info *ste_info; + + /* Take ref on table, only on first time this ste is used */ + mlx5dr_htbl_get(cur_htbl); + + /* new entry -> new branch */ + list_add_tail(&ste->miss_list_node, miss_list); + + mlx5dr_ste_set_miss_addr(hw_ste, nic_matcher->e_anchor->chunk->icm_addr); + + ste->ste_chain_location = ste_location; + + ste_info = kzalloc(sizeof(*ste_info), GFP_KERNEL); + if (!ste_info) + goto clean_ste_setting; + + if (mlx5dr_ste_create_next_htbl(matcher, + nic_matcher, + ste, + hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto clean_ste_info; + } + + cur_htbl->ctrl.num_of_valid_entries++; + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + return 0; + +clean_ste_info: + kfree(ste_info); +clean_ste_setting: + list_del_init(&ste->miss_list_node); + mlx5dr_htbl_put(cur_htbl); + + return -ENOMEM; +} + +static struct mlx5dr_ste * +dr_rule_handle_ste_branch(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *cur_htbl, + u8 *hw_ste, + u8 ste_location, + struct mlx5dr_ste_htbl **put_htbl) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *matched_ste; + struct list_head *miss_list; + bool skip_rehash = false; + struct mlx5dr_ste *ste; + int index; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + +again: + index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl); + miss_list = &cur_htbl->chunk->miss_list[index]; + ste = &cur_htbl->ste_arr[index]; + + if (mlx5dr_ste_not_used_ste(ste)) { + if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, + ste, ste_location, + hw_ste, miss_list, + send_ste_list)) + return NULL; + } else { + /* Hash table index in use, check if this ste is in the miss list */ + matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste); + if (matched_ste) { + /* If it is last STE in the chain, and has the same tag + * it means that all the previous stes are the same, + * if so, this rule is duplicated. + */ + if (mlx5dr_ste_is_last_in_rule(nic_matcher, + matched_ste->ste_chain_location)) { + mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n"); + return NULL; + } + return matched_ste; + } + + if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) { + /* Hash table index in use, try to resize of the hash */ + skip_rehash = true; + + /* Hold the table till we update. + * Release in dr_rule_create_rule() + */ + *put_htbl = cur_htbl; + mlx5dr_htbl_get(cur_htbl); + + new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl, + ste_location, send_ste_list); + if (!new_htbl) { + mlx5dr_htbl_put(cur_htbl); + mlx5dr_info(dmn, "failed creating rehash table, htbl-log_size: %d\n", + cur_htbl->chunk_size); + } else { + cur_htbl = new_htbl; + } + goto again; + } else { + /* Hash table index in use, add another collision (miss) */ + ste = dr_rule_handle_collision(matcher, + nic_matcher, + ste, + hw_ste, + miss_list, + send_ste_list); + if (!ste) { + mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n", + index); + return NULL; + } + } + } + return ste; +} + +static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value, + u32 s_idx, u32 e_idx) +{ + u32 i; + + for (i = s_idx; i < e_idx; i++) { + if (value[i] & ~mask[i]) { + pr_info("Rule parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static bool dr_rule_verify(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + struct mlx5dr_match_param *param) +{ + u8 match_criteria = matcher->match_criteria; + size_t value_size = value->match_sz; + u8 *mask_p = (u8 *)&matcher->mask; + u8 *param_p = (u8 *)param; + u32 s_idx, e_idx; + + if (!value_size || + (value_size > sizeof(struct mlx5dr_match_param) || + (value_size % sizeof(u32)))) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); + return false; + } + + mlx5dr_ste_copy_param(matcher->match_criteria, param, value); + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + s_idx = offsetof(struct mlx5dr_match_param, outer); + e_idx = min(s_idx + sizeof(param->outer), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + s_idx = offsetof(struct mlx5dr_match_param, misc); + e_idx = min(s_idx + sizeof(param->misc), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + s_idx = offsetof(struct mlx5dr_match_param, inner); + e_idx = min(s_idx + sizeof(param->inner), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + s_idx = offsetof(struct mlx5dr_match_param, misc2); + e_idx = min(s_idx + sizeof(param->misc2), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + s_idx = offsetof(struct mlx5dr_match_param, misc3); + e_idx = min(s_idx + sizeof(param->misc3), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_dbg(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + dr_rule_clean_rule_members(rule, nic_rule); + return 0; +} + +static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule) +{ + dr_rule_destroy_rule_nic(rule, &rule->rx); + dr_rule_destroy_rule_nic(rule, &rule->tx); + return 0; +} + +static int dr_rule_destroy_rule(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_rule_destroy_rule_nic(rule, &rule->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_rule_destroy_rule_nic(rule, &rule->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_rule_destroy_rule_fdb(rule); + break; + default: + return -EINVAL; + } + + dr_rule_remove_action_members(rule); + kfree(rule); + return 0; +} + +static bool dr_rule_is_ipv6(struct mlx5dr_match_param *param) +{ + return (param->outer.ip_version == 6 || + param->inner.ip_version == 6 || + param->outer.ethertype == ETH_P_IPV6 || + param->inner.ethertype == ETH_P_IPV6); +} + +static bool dr_rule_skip(enum mlx5dr_domain_type domain, + enum mlx5dr_ste_entry_type ste_type, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (domain != MLX5DR_DOMAIN_TYPE_FDB) + return false; + + if (mask->misc.source_port) { + if (ste_type == MLX5DR_STE_TYPE_RX) + if (value->misc.source_port != WIRE_PORT) + return true; + + if (ste_type == MLX5DR_STE_TYPE_TX) + if (value->misc.source_port == WIRE_PORT) + return true; + } + + /* Metadata C can be used to describe the source vport */ + if (mask->misc2.metadata_reg_c_0) { + if (ste_type == MLX5DR_STE_TYPE_RX) + if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) != WIRE_PORT) + return true; + + if (ste_type == MLX5DR_STE_TYPE_TX) + if ((value->misc2.metadata_reg_c_0 & WIRE_PORT) == WIRE_PORT) + return true; + } + return false; +} + +static int +dr_rule_create_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *htbl = NULL; + struct mlx5dr_ste_htbl *cur_htbl; + struct mlx5dr_ste *ste = NULL; + LIST_HEAD(send_ste_list); + u8 *hw_ste_arr = NULL; + u32 new_hw_ste_arr_sz; + int ret, i; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + INIT_LIST_HEAD(&nic_rule->rule_members_list); + + if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param)) + return 0; + + ret = mlx5dr_matcher_select_builders(matcher, + nic_matcher, + dr_rule_is_ipv6(param)); + if (ret) + goto out_err; + + hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL); + if (!hw_ste_arr) { + ret = -ENOMEM; + goto out_err; + } + + /* Set the tag values inside the ste array */ + ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); + if (ret) + goto free_hw_ste; + + /* Set the actions values/addresses inside the ste array */ + ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions, + num_actions, hw_ste_arr, + &new_hw_ste_arr_sz); + if (ret) + goto free_hw_ste; + + cur_htbl = nic_matcher->s_htbl; + + /* Go over the array of STEs, and build dr_ste accordingly. + * The loop is over only the builders which are equal or less to the + * number of stes, in case we have actions that lives in other stes. + */ + for (i = 0; i < nic_matcher->num_of_builders; i++) { + /* Calculate CRC and keep new ste entry */ + u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE); + + ste = dr_rule_handle_ste_branch(rule, + nic_rule, + &send_ste_list, + cur_htbl, + cur_hw_ste_ent, + i + 1, + &htbl); + if (!ste) { + mlx5dr_err(dmn, "Failed creating next branch\n"); + ret = -ENOENT; + goto free_rule; + } + + cur_htbl = ste->next_htbl; + + /* Keep all STEs in the rule struct */ + ret = dr_rule_add_member(nic_rule, ste); + if (ret) { + mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i); + goto free_ste; + } + + mlx5dr_ste_get(ste); + } + + /* Connect actions */ + ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list, + ste, hw_ste_arr, new_hw_ste_arr_sz); + if (ret) { + mlx5dr_dbg(dmn, "Failed apply actions\n"); + goto free_rule; + } + ret = dr_rule_send_update_list(&send_ste_list, dmn, true); + if (ret) { + mlx5dr_err(dmn, "Failed sending ste!\n"); + goto free_rule; + } + + if (htbl) + mlx5dr_htbl_put(htbl); + + return 0; + +free_ste: + mlx5dr_ste_put(ste, matcher, nic_matcher); +free_rule: + dr_rule_clean_rule_members(rule, nic_rule); + /* Clean all ste_info's */ + list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) { + list_del(&ste_info->send_list); + kfree(ste_info); + } +free_hw_ste: + kfree(hw_ste_arr); +out_err: + return ret; +} + +static int +dr_rule_create_rule_fdb(struct mlx5dr_rule *rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_match_param copy_param = {}; + int ret; + + /* Copy match_param since they will be consumed during the first + * nic_rule insertion. + */ + memcpy(©_param, param, sizeof(struct mlx5dr_match_param)); + + ret = dr_rule_create_rule_nic(rule, &rule->rx, param, + num_actions, actions); + if (ret) + return ret; + + ret = dr_rule_create_rule_nic(rule, &rule->tx, ©_param, + num_actions, actions); + if (ret) + goto destroy_rule_nic_rx; + + return 0; + +destroy_rule_nic_rx: + dr_rule_destroy_rule_nic(rule, &rule->rx); + return ret; +} + +static struct mlx5dr_rule * +dr_rule_create_rule(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param param = {}; + struct mlx5dr_rule *rule; + int ret; + + if (!dr_rule_verify(matcher, value, ¶m)) + return NULL; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->matcher = matcher; + INIT_LIST_HEAD(&rule->rule_actions_list); + + ret = dr_rule_add_action_members(rule, num_actions, actions); + if (ret) + goto free_rule; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + rule->rx.nic_matcher = &matcher->rx; + ret = dr_rule_create_rule_nic(rule, &rule->rx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_nic(rule, &rule->tx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + rule->rx.nic_matcher = &matcher->rx; + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_fdb(rule, ¶m, + num_actions, actions); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + goto remove_action_members; + + return rule; + +remove_action_members: + dr_rule_remove_action_members(rule); +free_rule: + kfree(rule); + mlx5dr_info(dmn, "Failed creating rule\n"); + return NULL; +} + +struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule *rule; + + mutex_lock(&matcher->tbl->dmn->mutex); + refcount_inc(&matcher->refcount); + + rule = dr_rule_create_rule(matcher, value, num_actions, actions); + if (!rule) + refcount_dec(&matcher->refcount); + + mutex_unlock(&matcher->tbl->dmn->mutex); + + return rule; +} + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_table *tbl = rule->matcher->tbl; + int ret; + + mutex_lock(&tbl->dmn->mutex); + + ret = dr_rule_destroy_rule(rule); + + mutex_unlock(&tbl->dmn->mutex); + + if (!ret) + refcount_dec(&matcher->refcount); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c new file mode 100644 index 000000000000..5df8436b2ae3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -0,0 +1,975 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define QUEUE_SIZE 128 +#define SIGNAL_PER_DIV_QUEUE 16 +#define TH_NUMS_TO_DRAIN 2 + +enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; + +struct dr_data_seg { + u64 addr; + u32 length; + u32 lkey; + unsigned int send_flags; +}; + +struct postsend_info { + struct dr_data_seg write; + struct dr_data_seg read; + u64 remote_addr; + u32 rkey; +}; + +struct dr_qp_rtr_attr { + struct mlx5dr_cmd_gid_attr dgid_attr; + enum ib_mtu mtu; + u32 qp_num; + u16 port_num; + u8 min_rnr_timer; + u8 sgid_index; + u16 udp_src_port; +}; + +struct dr_qp_rts_attr { + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; +}; + +struct dr_qp_init_attr { + u32 cqn; + u32 pdn; + u32 max_send_wr; + struct mlx5_uars_page *uar; +}; + +static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) +{ + unsigned int idx; + u8 opcode; + + opcode = get_cqe_opcode(cqe64); + if (opcode == MLX5_CQE_REQ_ERR) { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + } else if (opcode == MLX5_CQE_RESP_ERR) { + ++dr_cq->qp->sq.cc; + } else { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + + return CQ_OK; + } + + return CQ_POLL_ERR; +} + +static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) +{ + struct mlx5_cqe64 *cqe64; + int err; + + cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); + if (!cqe64) + return CQ_EMPTY; + + mlx5_cqwq_pop(&dr_cq->wq); + err = dr_parse_cqe(dr_cq, cqe64); + mlx5_cqwq_update_db_record(&dr_cq->wq); + + return err; +} + +static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) +{ + int npolled; + int err = 0; + + for (npolled = 0; npolled < ne; ++npolled) { + err = dr_cq_poll_one(dr_cq); + if (err != CQ_OK) + break; + } + + return err == CQ_POLL_ERR ? err : npolled; +} + +static void dr_qp_event(struct mlx5_core_qp *mqp, int event) +{ + pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn); +} + +static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, + struct dr_qp_init_attr *attr) +{ + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; + struct mlx5_wq_param wqp; + struct mlx5dr_qp *dr_qp; + int inlen; + void *qpc; + void *in; + int err; + + dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); + if (!dr_qp) + return NULL; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + dr_qp->rq.pc = 0; + dr_qp->rq.cc = 0; + dr_qp->rq.wqe_cnt = 4; + dr_qp->sq.pc = 0; + dr_qp->sq.cc = 0; + dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, + &dr_qp->wq_ctrl); + if (err) { + mlx5_core_info(mdev, "Can't create QP WQ\n"); + goto err_wq; + } + + dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, + sizeof(dr_qp->sq.wqe_head[0]), + GFP_KERNEL); + + if (!dr_qp->sq.wqe_head) { + mlx5_core_warn(mdev, "Can't allocate wqe head\n"); + goto err_wqe_head; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + dr_qp->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, attr->pdn); + MLX5_SET(qpc, qpc, uar_page, attr->uar->index); + MLX5_SET(qpc, qpc, log_page_size, + dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); + MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + in, pas)); + + err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen); + kfree(in); + + if (err) { + mlx5_core_warn(mdev, " Can't create QP\n"); + goto err_in; + } + dr_qp->mqp.event = dr_qp_event; + dr_qp->uar = attr->uar; + + return dr_qp; + +err_in: + kfree(dr_qp->sq.wqe_head); +err_wqe_head: + mlx5_wq_destroy(&dr_qp->wq_ctrl); +err_wq: + kfree(dr_qp); + return NULL; +} + +static void dr_destroy_qp(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp) +{ + mlx5_core_destroy_qp(mdev, &dr_qp->mqp); + kfree(dr_qp->sq.wqe_head); + mlx5_wq_destroy(&dr_qp->wq_ctrl); + kfree(dr_qp); +} + +static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) +{ + dma_wmb(); + *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); + + /* After wmb() the hw aware of new work */ + wmb(); + + mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); +} + +static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, + u32 rkey, struct dr_data_seg *data_seg, + u32 opcode, int nreq) +{ + struct mlx5_wqe_raddr_seg *wq_raddr; + struct mlx5_wqe_ctrl_seg *wq_ctrl; + struct mlx5_wqe_data_seg *wq_dseg; + unsigned int size; + unsigned int idx; + + size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + + sizeof(*wq_raddr) / 16; + + idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); + + wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); + wq_ctrl->imm = 0; + wq_ctrl->fm_ce_se = (data_seg->send_flags) ? + MLX5_WQE_CTRL_CQ_UPDATE : 0; + wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | + opcode); + wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8); + wq_raddr = (void *)(wq_ctrl + 1); + wq_raddr->raddr = cpu_to_be64(remote_addr); + wq_raddr->rkey = cpu_to_be32(rkey); + wq_raddr->reserved = 0; + + wq_dseg = (void *)(wq_raddr + 1); + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; + + if (nreq) + dr_cmd_notify_hw(dr_qp, wq_ctrl); +} + +static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) +{ + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->read, MLX5_OPCODE_RDMA_READ, 1); +} + +/** + * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent + * with send_list parameters: + * + * @ste: The data that attached to this specific ste + * @size: of data to write + * @offset: of the data from start of the hw_ste entry + * @data: data + * @ste_info: ste to be sent with send_list + * @send_list: to append into it + * @copy_data: if true indicates that the data should be kept because + * it's not backuped any where (like in re-hash). + * if false, it lets the data to be updated after + * it was added to the list. + */ +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data) +{ + ste_info->size = size; + ste_info->ste = ste; + ste_info->offset = offset; + + if (copy_data) { + memcpy(ste_info->data_cont, data, size); + ste_info->data = ste_info->data_cont; + } else { + ste_info->data = data; + } + + list_add_tail(&ste_info->send_list, send_list); +} + +/* The function tries to consume one wc each time, unless the queue is full, in + * that case, which means that the hw is behind the sw in a full queue len + * the function will drain the cq till it empty. + */ +static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + bool is_drain = false; + int ne; + + if (send_ring->pending_wqe < send_ring->signal_th) + return 0; + + /* Queue is full start drain it */ + if (send_ring->pending_wqe >= + dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) + is_drain = true; + + do { + ne = dr_poll_cq(send_ring->cq, 1); + if (ne < 0) + return ne; + else if (ne == 1) + send_ring->pending_wqe -= send_ring->signal_th; + } while (is_drain && send_ring->pending_wqe); + + return 0; +} + +static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + send_ring->pending_wqe++; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->write.send_flags |= IB_SEND_SIGNALED; + + send_ring->pending_wqe++; + send_info->read.length = send_info->write.length; + /* Read into the same write area */ + send_info->read.addr = (uintptr_t)send_info->write.addr; + send_info->read.lkey = send_ring->mr->mkey.key; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->read.send_flags = IB_SEND_SIGNALED; + else + send_info->read.send_flags = 0; +} + +static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, + struct postsend_info *send_info) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + u32 buff_offset; + int ret; + + ret = dr_handle_pending_wc(dmn, send_ring); + if (ret) + return ret; + + if (send_info->write.length > dmn->info.max_inline_size) { + buff_offset = (send_ring->tx_head & + (dmn->send_ring->signal_th - 1)) * + send_ring->max_post_send_size; + /* Copy to ring mr */ + memcpy(send_ring->buf + buff_offset, + (void *)(uintptr_t)send_info->write.addr, + send_info->write.length); + send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; + send_info->write.lkey = send_ring->mr->mkey.key; + } + + send_ring->tx_head++; + dr_fill_data_segs(send_ring, send_info); + dr_post_send(send_ring->qp, send_info); + + return 0; +} + +static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 **data, + u32 *byte_size, + int *iterations, + int *num_stes) +{ + int alloc_size; + + if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { + *iterations = htbl->chunk->byte_size / + dmn->send_ring->max_post_send_size; + *byte_size = dmn->send_ring->max_post_send_size; + alloc_size = *byte_size; + *num_stes = *byte_size / DR_STE_SIZE; + } else { + *iterations = 1; + *num_stes = htbl->chunk->num_of_entries; + alloc_size = *num_stes * DR_STE_SIZE; + } + + *data = kzalloc(alloc_size, GFP_KERNEL); + if (!*data) + return -ENOMEM; + + return 0; +} + +/** + * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. + * + * @dmn: Domain + * @ste: The ste struct that contains the data (at + * least part of it) + * @data: The real data to send size data + * @size: for writing. + * @offset: The offset from the icm mapped data to + * start write to this for write only part of the + * buffer. + * + * Return: 0 on success. + */ +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, + u8 *data, u16 size, u16 offset) +{ + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = size; + send_info.write.lkey = 0; + send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; + send_info.rkey = ste->htbl->chunk->rkey; + + return dr_postsend_icm_data(dmn, &send_info); +} + +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask) +{ + u32 byte_size = htbl->chunk->byte_size; + int num_stes_per_iter; + int iterations; + u8 *data; + int ret; + int i; + int j; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes_per_iter); + if (ret) + return ret; + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u32 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + /* Copy all ste's on the data buffer + * need to add the bit_mask + */ + for (j = 0; j < num_stes_per_iter; j++) { + u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste; + u32 ste_off = j * DR_STE_SIZE; + + if (mlx5dr_ste_is_not_valid_entry(hw_ste)) { + memcpy(data + ste_off, + formatted_ste, DR_STE_SIZE); + } else { + /* Copy data */ + memcpy(data + ste_off, + htbl->ste_arr[ste_index + j].hw_ste, + DR_STE_SIZE_REDUCED); + /* Copy bit_mask */ + memcpy(data + ste_off + DR_STE_SIZE_REDUCED, + mask, DR_STE_SIZE_MASK); + } + } + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); + send_info.rkey = htbl->chunk->rkey; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kfree(data); + return ret; +} + +/* Initialize htble with default STEs */ +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste) +{ + u32 byte_size = htbl->chunk->byte_size; + int iterations; + int num_stes; + u8 *data; + int ret; + int i; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes); + if (ret) + return ret; + + for (i = 0; i < num_stes; i++) { + u8 *copy_dst; + + /* Copy the same ste on the data buffer */ + copy_dst = data + i * DR_STE_SIZE; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE); + + if (update_hw_ste) { + /* Copy the reduced ste to hash table ste_arr */ + copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); + } + } + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u8 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); + send_info.rkey = htbl->chunk->rkey; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kfree(data); + return ret; +} + +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action) +{ + struct postsend_info send_info = {}; + int ret; + + send_info.write.addr = (uintptr_t)action->rewrite.data; + send_info.write.length = action->rewrite.chunk->byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = action->rewrite.chunk->mr_addr; + send_info.rkey = action->rewrite.chunk->rkey; + + mutex_lock(&dmn->mutex); + ret = dr_postsend_icm_data(dmn, &send_info); + mutex_unlock(&dmn->mutex); + + return ret; +} + +static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + int port) +{ + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); + MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); + MLX5_SET(qpc, qpc, rre, 1); + MLX5_SET(qpc, qpc, rwe, 1); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rts_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); + + MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn); + + MLX5_SET(qpc, qpc, log_ack_req_freq, 0); + MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); + MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rtr_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); + + MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn); + + MLX5_SET(qpc, qpc, mtu, attr->mtu); + MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); + MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + attr->sgid_index); + + if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + attr->udp_src_port); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); + MLX5_SET(qpc, qpc, min_rnr_nak, 1); + + return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, + &dr_qp->mqp); +} + +static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; + struct dr_qp_rts_attr rts_attr = {}; + struct dr_qp_rtr_attr rtr_attr = {}; + enum ib_mtu mtu = IB_MTU_1024; + u16 gid_index = 0; + int port = 1; + int ret; + + /* Init */ + ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); + if (ret) + return ret; + + /* RTR */ + ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); + if (ret) + return ret; + + rtr_attr.mtu = mtu; + rtr_attr.qp_num = dr_qp->mqp.qpn; + rtr_attr.min_rnr_timer = 12; + rtr_attr.port_num = port; + rtr_attr.sgid_index = gid_index; + rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; + + ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); + if (ret) + return ret; + + /* RTS */ + rts_attr.timeout = 14; + rts_attr.retry_cnt = 7; + rts_attr.rnr_retry = 7; + + ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); + if (ret) + return ret; + + return 0; +} + +static void dr_cq_event(struct mlx5_core_cq *mcq, + enum mlx5_event event) +{ + pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn); +} + +static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, + struct mlx5_uars_page *uar, + size_t ncqe) +{ + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + struct mlx5dr_cq *cq; + int inlen, err, eqn; + unsigned int irqn; + void *cqc, *in; + __be64 *pas; + u32 i; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return NULL; + + ncqe = roundup_pow_of_two(ncqe); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + goto out; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + goto err_cqwq; + + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); + + cq->mcq.event = dr_cq_event; + + err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); + kvfree(in); + + if (err) + goto err_cqwq; + + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + cq->mcq.vector = 0; + cq->mcq.irqn = irqn; + cq->mcq.uar = uar; + + return cq; + +err_cqwq: + mlx5_wq_destroy(&cq->wq_ctrl); +out: + kfree(cq); + return NULL; +} + +static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) +{ + mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); + kfree(cq); +} + +static int +dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) +{ + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, 1); + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); +} + +static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, + u32 pdn, void *buf, size_t size) +{ + struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + if (!mr) + return NULL; + + dma_device = &mdev->pdev->dev; + dma_addr = dma_map_single(dma_device, buf, size, + DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_warn(mdev, "Can't dma buf\n"); + kfree(mr); + return NULL; + } + + err = dr_create_mkey(mdev, pdn, &mr->mkey); + if (err) { + mlx5_core_warn(mdev, "Can't create mkey\n"); + dma_unmap_single(dma_device, dma_addr, size, + DMA_BIDIRECTIONAL); + kfree(mr); + return NULL; + } + + mr->dma_addr = dma_addr; + mr->size = size; + mr->addr = buf; + + return mr; +} + +static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) +{ + mlx5_core_destroy_mkey(mdev, &mr->mkey); + dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size, + DMA_BIDIRECTIONAL); + kfree(mr); +} + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) +{ + struct dr_qp_init_attr init_attr = {}; + int cq_size; + int size; + int ret; + + dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); + if (!dmn->send_ring) + return -ENOMEM; + + cq_size = QUEUE_SIZE + 1; + dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); + if (!dmn->send_ring->cq) { + ret = -ENOMEM; + goto free_send_ring; + } + + init_attr.cqn = dmn->send_ring->cq->mcq.cqn; + init_attr.pdn = dmn->pdn; + init_attr.uar = dmn->uar; + init_attr.max_send_wr = QUEUE_SIZE; + + dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); + if (!dmn->send_ring->qp) { + ret = -ENOMEM; + goto clean_cq; + } + + dmn->send_ring->cq->qp = dmn->send_ring->qp; + + dmn->info.max_send_wr = QUEUE_SIZE; + dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, + DR_STE_SIZE); + + dmn->send_ring->signal_th = dmn->info.max_send_wr / + SIGNAL_PER_DIV_QUEUE; + + /* Prepare qp to be used */ + ret = dr_prepare_qp_to_rts(dmn); + if (ret) + goto clean_qp; + + dmn->send_ring->max_post_send_size = + mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, + DR_ICM_TYPE_STE); + + /* Allocating the max size as a buffer for writing */ + size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; + dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); + if (!dmn->send_ring->buf) { + ret = -ENOMEM; + goto clean_qp; + } + + dmn->send_ring->buf_size = size; + + dmn->send_ring->mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->buf, size); + if (!dmn->send_ring->mr) { + ret = -ENOMEM; + goto free_mem; + } + + dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->sync_buff, + MIN_READ_SYNC); + if (!dmn->send_ring->sync_mr) { + ret = -ENOMEM; + goto clean_mr; + } + + return 0; + +clean_mr: + dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); +free_mem: + kfree(dmn->send_ring->buf); +clean_qp: + dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); +clean_cq: + dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); +free_send_ring: + kfree(dmn->send_ring); + + return ret; +} + +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + dr_destroy_qp(dmn->mdev, send_ring->qp); + dr_destroy_cq(dmn->mdev, send_ring->cq); + dr_dereg_mr(dmn->mdev, send_ring->sync_mr); + dr_dereg_mr(dmn->mdev, send_ring->mr); + kfree(send_ring->buf); + kfree(send_ring); +} + +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + struct postsend_info send_info = {}; + u8 data[DR_STE_SIZE]; + int num_of_sends_req; + int ret; + int i; + + /* Sending this amount of requests makes sure we will get drain */ + num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; + + /* Send fake requests forcing the last to be signaled */ + send_info.write.addr = (uintptr_t)data; + send_info.write.length = DR_STE_SIZE; + send_info.write.lkey = 0; + /* Using the sync_mr in order to write/read */ + send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; + send_info.rkey = send_ring->sync_mr->mkey.key; + + for (i = 0; i < num_of_sends_req; i++) { + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + return ret; + } + + ret = dr_handle_pending_wc(dmn, send_ring); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c new file mode 100644 index 000000000000..6b0af64536d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -0,0 +1,2308 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/types.h> +#include "dr_types.h" + +#define DR_STE_CRC_POLY 0xEDB88320L +#define STE_IPV4 0x1 +#define STE_IPV6 0x2 +#define STE_TCP 0x1 +#define STE_UDP 0x2 +#define STE_SPI 0x3 +#define IP_VERSION_IPV4 0x4 +#define IP_VERSION_IPV6 0x6 +#define STE_SVLAN 0x1 +#define STE_CVLAN 0x2 + +#define DR_STE_ENABLE_FLOW_TAG BIT(31) + +/* Set to STE a specific value using DR_STE_SET */ +#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \ + if ((spec)->s_fname) { \ + MLX5_SET(ste_##lookup_type, tag, t_fname, value); \ + (spec)->s_fname = 0; \ + } \ +} while (0) + +/* Set to STE spec->s_fname to tag->t_fname */ +#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname) + +/* Set to STE -1 to bit_mask->bm_fname and set spec->s_fname as used */ +#define DR_STE_SET_MASK(lookup_type, bit_mask, bm_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, -1) + +/* Set to STE spec->s_fname to bit_mask->bm_fname and set spec->s_fname as used */ +#define DR_STE_SET_MASK_V(lookup_type, bit_mask, bm_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, bit_mask, bm_fname, spec, s_fname, (spec)->s_fname) + +#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \ + MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \ +} while (0) + +#define DR_STE_SET_MPLS_MASK(lookup_type, mask, in_out, bit_mask) do { \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_label, mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_s_bos, mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_exp, mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_MASK_V(lookup_type, mask, mpls0_ttl, mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_SET_MPLS_TAG(lookup_type, mask, in_out, tag) do { \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_label, mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_TAG(lookup_type, tag, mpls0_s_bos, mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_exp, mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_TAG(lookup_type, tag, mpls0_ttl, mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) +#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) + +#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \ + ((inner) ? MLX5DR_STE_LU_TYPE_##lookup_type##_I : \ + (rx) ? MLX5DR_STE_LU_TYPE_##lookup_type##_D : \ + MLX5DR_STE_LU_TYPE_##lookup_type##_O) + +enum dr_ste_tunl_action { + DR_STE_TUNL_ACTION_NONE = 0, + DR_STE_TUNL_ACTION_ENABLE = 1, + DR_STE_TUNL_ACTION_DECAP = 2, + DR_STE_TUNL_ACTION_L3_DECAP = 3, + DR_STE_TUNL_ACTION_POP_VLAN = 4, +}; + +enum dr_ste_action_type { + DR_STE_ACTION_TYPE_PUSH_VLAN = 1, + DR_STE_ACTION_TYPE_ENCAP_L3 = 3, + DR_STE_ACTION_TYPE_ENCAP = 4, +}; + +struct dr_hw_ste_format { + u8 ctrl[DR_STE_SIZE_CTRL]; + u8 tag[DR_STE_SIZE_TAG]; + u8 mask[DR_STE_SIZE_MASK]; +}; + +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 masked[DR_STE_SIZE_TAG] = {}; + u32 crc32, index; + u16 bit; + int i; + + /* Don't calculate CRC if the result is predicted */ + if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0) + return 0; + + /* Mask tag using byte mask, bit per byte */ + bit = 1 << (DR_STE_SIZE_TAG - 1); + for (i = 0; i < DR_STE_SIZE_TAG; i++) { + if (htbl->byte_mask & bit) + masked[i] = hw_ste->tag[i]; + + bit = bit >> 1; + } + + crc32 = mlx5dr_crc32_slice8_calc(masked, DR_STE_SIZE_TAG); + index = crc32 & (htbl->chunk->num_of_entries - 1); + + return index; +} + +static u16 dr_ste_conv_bit_to_byte_mask(u8 *bit_mask) +{ + u16 byte_mask = 0; + int i; + + for (i = 0; i < DR_STE_SIZE_MASK; i++) { + byte_mask = byte_mask << 1; + if (bit_mask[i] == 0xff) + byte_mask |= 1; + } + return byte_mask; +} + +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK); +} + +void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer, + DR_STE_ENABLE_FLOW_TAG | flow_tag); +} + +void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + /* This can be used for both rx_steering_mult and for sx_transmit */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16); +} + +void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1); +} + +void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr, + bool go_back) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + DR_STE_ACTION_TYPE_PUSH_VLAN); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr); + /* Due to HW limitation we need to set this bit, otherwise reforamt + + * push vlan will not work. + */ + if (go_back) + mlx5dr_ste_set_go_back_bit(hw_ste_p); +} + +void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, int size, bool encap_l3) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id); +} + +void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_DECAP); +} + +void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_POP_VLAN); +} + +void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_L3_DECAP); + MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0); +} + +void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); +} + +u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, entry_type); +} + +void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index) +{ + MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions, + num_of_actions); + MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer, + re_write_index); +} + +void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi); +} + +void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, + u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); + MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type); + MLX5_SET(ste_general, hw_ste_p, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE); + + /* Set GVMI once, this is the same for RX/TX + * bits 63_48 of next table base / miss address encode the next GVMI + */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi); +} + +static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste) +{ + memset(&hw_ste->tag, 0, sizeof(hw_ste->tag)); + memset(&hw_ste->mask, 0, sizeof(hw_ste->mask)); +} + +static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste) +{ + hw_ste->tag[0] = 0xdc; + hw_ste->mask[0] = 0; +} + +u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste) +{ + u64 index = + (MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_31_6) | + MLX5_GET(ste_rx_steering_mult, hw_ste, miss_address_39_32) << 26); + + return index << 6; +} + +void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_general, hw_ste, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_general, hw_ste, next_table_base_31_5_size, index); +} + +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index; +} + +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index; +} + +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->ste_arr; + + return &ste->htbl->miss_list[index]; +} + +static void dr_ste_always_hit_htbl(struct mlx5dr_ste *ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + u8 *hw_ste = ste->hw_ste; + + MLX5_SET(ste_general, hw_ste, byte_mask, next_htbl->byte_mask); + MLX5_SET(ste_general, hw_ste, next_lu_type, next_htbl->lu_type); + mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); + + dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste); +} + +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location) +{ + return ste_location == nic_matcher->num_of_builders; +} + +/* Replace relevant fields, except of: + * htbl - keep the origin htbl + * miss_list + list - already took the src from the list. + * icm_addr/mr_addr - depends on the hosting table. + * + * Before: + * | a | -> | b | -> | c | -> + * + * After: + * | a | -> | c | -> + * While the data that was in b copied to a. + */ +static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src) +{ + memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED); + dst->next_htbl = src->next_htbl; + if (dst->next_htbl) + dst->next_htbl->pointing_ste = dst; + + refcount_set(&dst->refcount, refcount_read(&src->refcount)); + + INIT_LIST_HEAD(&dst->rule_list); + list_splice_tail_init(&src->rule_list, &dst->rule_list); +} + +/* Free ste which is the head and the only one in miss_list */ +static void +dr_ste_remove_head_ste(struct mlx5dr_ste *ste, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + u8 tmp_data_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste tmp_ste = {}; + u64 miss_addr; + + tmp_ste.hw_ste = tmp_data_ste; + + /* Use temp ste because dr_ste_always_miss_addr + * touches bit_mask area which doesn't exist at ste->hw_ste. + */ + memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); + miss_addr = nic_matcher->e_anchor->chunk->icm_addr; + mlx5dr_ste_always_miss_addr(&tmp_ste, miss_addr); + memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED); + + list_del_init(&ste->miss_list_node); + + /* Write full STE size in order to have "always_miss" */ + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, tmp_data_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste which is the head but NOT the only one in miss_list: + * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0 + */ +static void +dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) + +{ + struct mlx5dr_ste_htbl *next_miss_htbl; + + next_miss_htbl = next_ste->htbl; + + /* Remove from the miss_list the next_ste before copy */ + list_del_init(&next_ste->miss_list_node); + + /* All rule-members that use next_ste should know about that */ + mlx5dr_rule_update_rule_member(next_ste, ste); + + /* Move data from next into ste */ + dr_ste_replace(ste, next_ste); + + /* Del the htbl that contains the next_ste. + * The origin htbl stay with the same number of entries. + */ + mlx5dr_htbl_put(next_miss_htbl); + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE_REDUCED, + 0, ste->hw_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_collisions--; + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste that is located in the middle of the miss list: + * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_| + */ +static void dr_ste_remove_middle_ste(struct mlx5dr_ste *ste, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + struct mlx5dr_ste *prev_ste; + u64 miss_addr; + + prev_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->prev, struct mlx5dr_ste, + miss_list_node); + if (!prev_ste) { + WARN_ON(true); + return; + } + + miss_addr = mlx5dr_ste_get_miss_addr(ste->hw_ste); + mlx5dr_ste_set_miss_addr(prev_ste->hw_ste, miss_addr); + + mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_REDUCED, 0, + prev_ste->hw_ste, ste_info, + send_ste_list, true /* Copy data*/); + + list_del_init(&ste->miss_list_node); + + stats_tbl->ctrl.num_of_valid_entries--; + stats_tbl->ctrl.num_of_collisions--; +} + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info ste_info_head; + struct mlx5dr_ste *next_ste, *first_ste; + bool put_on_origin_table = true; + struct mlx5dr_ste_htbl *stats_tbl; + LIST_HEAD(send_ste_list); + + first_ste = list_entry(mlx5dr_ste_get_miss_list(ste)->next, + struct mlx5dr_ste, miss_list_node); + stats_tbl = first_ste->htbl; + + /* Two options: + * 1. ste is head: + * a. head ste is the only ste in the miss list + * b. head ste is not the only ste in the miss-list + * 2. ste is not head + */ + if (first_ste == ste) { /* Ste is the head */ + struct mlx5dr_ste *last_ste; + + last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + if (last_ste == first_ste) + next_ste = NULL; + else + next_ste = list_entry(ste->miss_list_node.next, + struct mlx5dr_ste, miss_list_node); + + if (!next_ste) { + /* One and only entry in the list */ + dr_ste_remove_head_ste(ste, nic_matcher, + &ste_info_head, + &send_ste_list, + stats_tbl); + } else { + /* First but not only entry in the list */ + dr_ste_replace_head_ste(ste, next_ste, &ste_info_head, + &send_ste_list, stats_tbl); + put_on_origin_table = false; + } + } else { /* Ste in the middle of the list */ + dr_ste_remove_middle_ste(ste, &ste_info_head, &send_ste_list, stats_tbl); + } + + /* Update HW */ + list_for_each_entry_safe(cur_ste_info, tmp_ste_info, + &send_ste_list, send_list) { + list_del(&cur_ste_info->send_list); + mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste, + cur_ste_info->data, cur_ste_info->size, + cur_ste_info->offset); + } + + if (put_on_origin_table) + mlx5dr_htbl_put(ste->htbl); +} + +bool mlx5dr_ste_equal_tag(void *src, void *dst) +{ + struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src; + struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst; + + return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG); +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + + mlx5dr_ste_set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); +} + +void mlx5dr_ste_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + /* Miss address for TX and RX STEs located in the same offsets */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index); +} + +void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr) +{ + u8 *hw_ste = ste->hw_ste; + + MLX5_SET(ste_rx_steering_mult, hw_ste, next_lu_type, MLX5DR_STE_LU_TYPE_DONT_CARE); + mlx5dr_ste_set_miss_addr(hw_ste, miss_addr); + dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste); +} + +/* The assumption here is that we don't update the ste->hw_ste if it is not + * used ste, so it will be all zero, checking the next_lu_type. + */ +bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)p_hw_ste; + + if (MLX5_GET(ste_general, hw_ste, next_lu_type) == + MLX5DR_STE_LU_TYPE_NOP) + return true; + + return false; +} + +bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste) +{ + return !refcount_read(&ste->refcount); +} + +static u16 get_bits_per_mask(u16 byte_mask) +{ + u16 bits = 0; + + while (byte_mask) { + byte_mask = byte_mask & (byte_mask - 1); + bits++; + } + + return bits; +} + +/* Init one ste as a pattern for ste data array */ +void mlx5dr_ste_set_formatted_ste(u16 gvmi, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info) +{ + struct mlx5dr_ste ste = {}; + + mlx5dr_ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi); + ste.hw_ste = formatted_ste; + + if (connect_info->type == CONNECT_HIT) + dr_ste_always_hit_htbl(&ste, connect_info->hit_next_htbl); + else + mlx5dr_ste_always_miss_addr(&ste, connect_info->miss_icm_addr); +} + +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste) +{ + u8 formatted_ste[DR_STE_SIZE] = {}; + + mlx5dr_ste_set_formatted_ste(dmn->info.caps.gvmi, + nic_dmn, + htbl, + formatted_ste, + connect_info); + + return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste); +} + +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)cur_hw_ste; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *next_htbl; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) { + u32 bits_in_mask; + u8 next_lu_type; + u16 byte_mask; + + next_lu_type = MLX5_GET(ste_general, hw_ste, next_lu_type); + byte_mask = MLX5_GET(ste_general, hw_ste, byte_mask); + + /* Don't allocate table more than required, + * the size of the table defined via the byte_mask, so no need + * to allocate more than that. + */ + bits_in_mask = get_bits_per_mask(byte_mask) * BITS_PER_BYTE; + log_table_size = min(log_table_size, bits_in_mask); + + next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + log_table_size, + next_lu_type, + byte_mask); + if (!next_htbl) { + mlx5dr_dbg(dmn, "Failed allocating table\n"); + return -ENOMEM; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl, + &info, false)) { + mlx5dr_info(dmn, "Failed writing table to HW\n"); + goto free_table; + } + + mlx5dr_ste_set_hit_addr_by_next_htbl(cur_hw_ste, next_htbl); + ste->next_htbl = next_htbl; + next_htbl->pointing_ste = ste; + } + + return 0; + +free_table: + mlx5dr_ste_htbl_free(next_htbl); + return -ENOENT; +} + +static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + int num_of_entries; + + htbl->ctrl.may_grow = true; + + if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1) + htbl->ctrl.may_grow = false; + + /* Threshold is 50%, one is added to table of size 1 */ + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size); + ctrl->increase_threshold = (num_of_entries + 1) / 2; +} + +struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u8 lu_type, u16 byte_mask) +{ + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste_htbl *htbl; + int i; + + htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); + if (!htbl) + return NULL; + + chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size); + if (!chunk) + goto out_free_htbl; + + htbl->chunk = chunk; + htbl->lu_type = lu_type; + htbl->byte_mask = byte_mask; + htbl->ste_arr = chunk->ste_arr; + htbl->hw_ste_arr = chunk->hw_ste_arr; + htbl->miss_list = chunk->miss_list; + refcount_set(&htbl->refcount, 0); + + for (i = 0; i < chunk->num_of_entries; i++) { + struct mlx5dr_ste *ste = &htbl->ste_arr[i]; + + ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + ste->htbl = htbl; + refcount_set(&ste->refcount, 0); + INIT_LIST_HEAD(&ste->miss_list_node); + INIT_LIST_HEAD(&htbl->miss_list[i]); + INIT_LIST_HEAD(&ste->rule_list); + } + + htbl->chunk_size = chunk_size; + dr_ste_set_ctrl(htbl); + return htbl; + +out_free_htbl: + kfree(htbl); + return NULL; +} + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl) +{ + if (refcount_read(&htbl->refcount)) + return -EBUSY; + + mlx5dr_icm_free_chunk(htbl->chunk); + kfree(htbl); + return 0; +} + +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (!value && (match_criteria & DR_MATCHER_CRITERIA_MISC)) { + if (mask->misc.source_port && mask->misc.source_port != 0xffff) { + mlx5dr_dbg(dmn, "Partial mask source_port is not supported\n"); + return -EINVAL; + } + } + + return 0; +} + +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_build *sb; + int ret, i; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, value); + if (ret) + return ret; + + sb = nic_matcher->ste_builder; + for (i = 0; i < nic_matcher->num_of_builders; i++) { + mlx5dr_ste_init(ste_arr, + sb->lu_type, + nic_dmn->ste_type, + dmn->info.caps.gvmi); + + mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask); + + ret = sb->ste_build_tag_func(value, sb, ste_arr); + if (ret) + return ret; + + /* Connect the STEs */ + if (i < (nic_matcher->num_of_builders - 1)) { + /* Need the next builder for these fields, + * not relevant for the last ste in the chain. + */ + sb++; + MLX5_SET(ste_general, ste_arr, next_lu_type, sb->lu_type); + MLX5_SET(ste_general, ste_arr, byte_mask, sb->byte_mask); + } + ste_arr += DR_STE_SIZE; + } + return 0; +} + +static int dr_ste_build_eth_l2_src_des_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + if (mask->smac_47_16 || mask->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32, + mask->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0, + mask->smac_47_16 << 16 | mask->smac_15_0); + mask->smac_47_16 = 0; + mask->smac_15_0 = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } + + if (mask->cvlan_tag || mask->svlan_tag) { + pr_info("Invalid c/svlan mask configuration\n"); + return -EINVAL; + } + + return 0; +} + +static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec) +{ + spec->gre_c_present = MLX5_GET(fte_match_set_misc, mask, gre_c_present); + spec->gre_k_present = MLX5_GET(fte_match_set_misc, mask, gre_k_present); + spec->gre_s_present = MLX5_GET(fte_match_set_misc, mask, gre_s_present); + spec->source_vhca_port = MLX5_GET(fte_match_set_misc, mask, source_vhca_port); + spec->source_sqn = MLX5_GET(fte_match_set_misc, mask, source_sqn); + + spec->source_port = MLX5_GET(fte_match_set_misc, mask, source_port); + + spec->outer_second_prio = MLX5_GET(fte_match_set_misc, mask, outer_second_prio); + spec->outer_second_cfi = MLX5_GET(fte_match_set_misc, mask, outer_second_cfi); + spec->outer_second_vid = MLX5_GET(fte_match_set_misc, mask, outer_second_vid); + spec->inner_second_prio = MLX5_GET(fte_match_set_misc, mask, inner_second_prio); + spec->inner_second_cfi = MLX5_GET(fte_match_set_misc, mask, inner_second_cfi); + spec->inner_second_vid = MLX5_GET(fte_match_set_misc, mask, inner_second_vid); + + spec->outer_second_cvlan_tag = + MLX5_GET(fte_match_set_misc, mask, outer_second_cvlan_tag); + spec->inner_second_cvlan_tag = + MLX5_GET(fte_match_set_misc, mask, inner_second_cvlan_tag); + spec->outer_second_svlan_tag = + MLX5_GET(fte_match_set_misc, mask, outer_second_svlan_tag); + spec->inner_second_svlan_tag = + MLX5_GET(fte_match_set_misc, mask, inner_second_svlan_tag); + + spec->gre_protocol = MLX5_GET(fte_match_set_misc, mask, gre_protocol); + + spec->gre_key_h = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi); + spec->gre_key_l = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo); + + spec->vxlan_vni = MLX5_GET(fte_match_set_misc, mask, vxlan_vni); + + spec->geneve_vni = MLX5_GET(fte_match_set_misc, mask, geneve_vni); + spec->geneve_oam = MLX5_GET(fte_match_set_misc, mask, geneve_oam); + + spec->outer_ipv6_flow_label = + MLX5_GET(fte_match_set_misc, mask, outer_ipv6_flow_label); + + spec->inner_ipv6_flow_label = + MLX5_GET(fte_match_set_misc, mask, inner_ipv6_flow_label); + + spec->geneve_opt_len = MLX5_GET(fte_match_set_misc, mask, geneve_opt_len); + spec->geneve_protocol_type = + MLX5_GET(fte_match_set_misc, mask, geneve_protocol_type); + + spec->bth_dst_qp = MLX5_GET(fte_match_set_misc, mask, bth_dst_qp); +} + +static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec) +{ + u32 raw_ip[4]; + + spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16); + + spec->smac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0); + spec->ethertype = MLX5_GET(fte_match_set_lyr_2_4, mask, ethertype); + + spec->dmac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16); + + spec->dmac_15_0 = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0); + spec->first_prio = MLX5_GET(fte_match_set_lyr_2_4, mask, first_prio); + spec->first_cfi = MLX5_GET(fte_match_set_lyr_2_4, mask, first_cfi); + spec->first_vid = MLX5_GET(fte_match_set_lyr_2_4, mask, first_vid); + + spec->ip_protocol = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_protocol); + spec->ip_dscp = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_dscp); + spec->ip_ecn = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_ecn); + spec->cvlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, cvlan_tag); + spec->svlan_tag = MLX5_GET(fte_match_set_lyr_2_4, mask, svlan_tag); + spec->frag = MLX5_GET(fte_match_set_lyr_2_4, mask, frag); + spec->ip_version = MLX5_GET(fte_match_set_lyr_2_4, mask, ip_version); + spec->tcp_flags = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_flags); + spec->tcp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_sport); + spec->tcp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, tcp_dport); + + spec->ttl_hoplimit = MLX5_GET(fte_match_set_lyr_2_4, mask, ttl_hoplimit); + + spec->udp_sport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_sport); + spec->udp_dport = MLX5_GET(fte_match_set_lyr_2_4, mask, udp_dport); + + memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip)); + + spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]); + + memcpy(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip)); + + spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]); +} + +static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec) +{ + spec->outer_first_mpls_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_label); + spec->outer_first_mpls_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp); + spec->outer_first_mpls_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos); + spec->outer_first_mpls_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl); + spec->inner_first_mpls_label = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_label); + spec->inner_first_mpls_exp = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp); + spec->inner_first_mpls_s_bos = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos); + spec->inner_first_mpls_ttl = + MLX5_GET(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl); + spec->outer_first_mpls_over_gre_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label); + spec->outer_first_mpls_over_gre_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp); + spec->outer_first_mpls_over_gre_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos); + spec->outer_first_mpls_over_gre_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl); + spec->outer_first_mpls_over_udp_label = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label); + spec->outer_first_mpls_over_udp_exp = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp); + spec->outer_first_mpls_over_udp_s_bos = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos); + spec->outer_first_mpls_over_udp_ttl = + MLX5_GET(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl); + spec->metadata_reg_c_7 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_7); + spec->metadata_reg_c_6 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_6); + spec->metadata_reg_c_5 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_5); + spec->metadata_reg_c_4 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_4); + spec->metadata_reg_c_3 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_3); + spec->metadata_reg_c_2 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_2); + spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1); + spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0); + spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a); + spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b); +} + +static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec) +{ + spec->inner_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_seq_num); + spec->outer_tcp_seq_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_seq_num); + spec->inner_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, inner_tcp_ack_num); + spec->outer_tcp_ack_num = MLX5_GET(fte_match_set_misc3, mask, outer_tcp_ack_num); + spec->outer_vxlan_gpe_vni = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_vni); + spec->outer_vxlan_gpe_next_protocol = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol); + spec->outer_vxlan_gpe_flags = + MLX5_GET(fte_match_set_misc3, mask, outer_vxlan_gpe_flags); + spec->icmpv4_header_data = MLX5_GET(fte_match_set_misc3, mask, icmp_header_data); + spec->icmpv6_header_data = + MLX5_GET(fte_match_set_misc3, mask, icmpv6_header_data); + spec->icmpv4_type = MLX5_GET(fte_match_set_misc3, mask, icmp_type); + spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code); + spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type); + spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code); +} + +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask) +{ + u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {}; + u8 *data = (u8 *)mask->match_buf; + size_t param_location; + void *buff; + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data, mask->match_sz); + buff = tail_param; + } else { + buff = mask->match_buf; + } + dr_ste_copy_mask_spec(buff, &set_param->outer); + } + param_location = sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc(buff, &set_param->misc); + } + param_location += sizeof(struct mlx5dr_match_misc); + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_spec(buff, &set_param->inner); + } + param_location += sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc2)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc2(buff, &set_param->misc2); + } + + param_location += sizeof(struct mlx5dr_match_misc2); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc3)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc3(buff, &set_param->misc3); + } +} + +static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0); + + if (spec->smac_47_16 || spec->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32, + spec->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0, + spec->smac_47_16 << 16 | spec->smac_15_0); + spec->smac_47_16 = 0; + spec->smac_15_0 = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + pr_info("Unsupported ip_version value\n"); + return -EINVAL; + } + } + + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_eth_l2_src_des_bit_mask(mask, inner, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_des_tag; + + return 0; +} + +static void dr_ste_build_eth_l3_ipv6_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_127_96, mask, dst_ip_127_96); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_95_64, mask, dst_ip_95_64); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_63_32, mask, dst_ip_63_32); + DR_STE_SET_MASK_V(eth_l3_ipv6_dst, bit_mask, dst_ip_31_0, mask, dst_ip_31_0); +} + +static int dr_ste_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv6_dst_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_dst_tag; +} + +static void dr_ste_build_eth_l3_ipv6_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_127_96, mask, src_ip_127_96); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_95_64, mask, src_ip_95_64); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_63_32, mask, src_ip_63_32); + DR_STE_SET_MASK_V(eth_l3_ipv6_src, bit_mask, src_ip_31_0, mask, src_ip_31_0); +} + +static int dr_ste_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv6_src_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv6_src_tag; +} + +static void dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(struct mlx5dr_match_param *value, + bool inner, + u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_address, mask, dst_ip_31_0); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_address, mask, src_ip_31_0); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_port, mask, tcp_dport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + destination_port, mask, udp_dport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_port, mask, tcp_sport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + source_port, mask, udp_sport); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + protocol, mask, ip_protocol); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + dscp, mask, ip_dscp); + DR_STE_SET_MASK_V(eth_l3_ipv4_5_tuple, bit_mask, + ecn, mask, ip_ecn); + + if (mask->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, bit_mask, mask); + mask->tcp_flags = 0; + } +} + +static int dr_ste_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv4_5_tuple_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_5_tuple_tag; +} + +static void +dr_ste_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_MASK(eth_l2_src, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int dr_ste_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + pr_info("Unsupported ip_version value\n"); + return -EINVAL; + } + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void dr_ste_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_MASK_V(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); +} + +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l2_src_bit_mask(mask, inner, sb->bit_mask); + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_src_tag; +} + +static void dr_ste_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_build_eth_l2_src_or_dst_tag(value, sb->inner, hw_ste_p); +} + +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l2_dst_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_dst_tag; +} + +static void dr_ste_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_MASK(eth_l2_tnl, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int dr_ste_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + return 0; +} + +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + dr_ste_build_eth_l2_tnl_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l2_tnl_tag; +} + +static void dr_ste_build_eth_l3_ipv4_misc_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l3_ipv4_misc, bit_mask, time_to_live, mask, ttl_hoplimit); +} + +static int dr_ste_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); + + return 0; +} + +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l3_ipv4_misc_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l3_ipv4_misc_tag; +} + +static void dr_ste_build_ipv6_l3_l4_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, tcp_dport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, tcp_sport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, dst_port, mask, udp_dport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, src_port, mask, udp_sport); + DR_STE_SET_MASK_V(eth_l4, bit_mask, protocol, mask, ip_protocol); + DR_STE_SET_MASK_V(eth_l4, bit_mask, fragmented, mask, frag); + DR_STE_SET_MASK_V(eth_l4, bit_mask, dscp, mask, ip_dscp); + DR_STE_SET_MASK_V(eth_l4, bit_mask, ecn, mask, ip_ecn); + DR_STE_SET_MASK_V(eth_l4, bit_mask, ipv6_hop_limit, mask, ttl_hoplimit); + + if (mask->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, bit_mask, mask); + mask->tcp_flags = 0; + } +} + +static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_ipv6_l3_l4_tag; +} + +static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + return 0; +} + +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx) +{ + sb->rx = rx; + sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE; + sb->byte_mask = 0; + sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag; +} + +static void dr_ste_build_mpls_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc2_mask = &value->misc2; + + if (inner) + DR_STE_SET_MPLS_MASK(mpls, misc2_mask, inner, bit_mask); + else + DR_STE_SET_MPLS_MASK(mpls, misc2_mask, outer, bit_mask); +} + +static int dr_ste_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + if (sb->inner) + DR_STE_SET_MPLS_TAG(mpls, misc2_mask, inner, tag); + else + DR_STE_SET_MPLS_TAG(mpls, misc2_mask, outer, tag); + + return 0; +} + +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_mpls_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_mpls_tag; +} + +static void dr_ste_build_gre_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_MASK_V(gre, bit_mask, gre_protocol, misc_mask, gre_protocol); + DR_STE_SET_MASK_V(gre, bit_mask, gre_k_present, misc_mask, gre_k_present); + DR_STE_SET_MASK_V(gre, bit_mask, gre_key_h, misc_mask, gre_key_h); + DR_STE_SET_MASK_V(gre, bit_mask, gre_key_l, misc_mask, gre_key_l); + + DR_STE_SET_MASK_V(gre, bit_mask, gre_c_present, misc_mask, gre_c_present); + DR_STE_SET_MASK_V(gre, bit_mask, gre_s_present, misc_mask, gre_s_present); +} + +static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol); + + DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present); + + DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_GRE; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_gre_tag; +} + +static void dr_ste_build_flex_parser_0_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) { + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label, + misc_2_mask, outer_first_mpls_over_gre_label); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp, + misc_2_mask, outer_first_mpls_over_gre_exp); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_label, + misc_2_mask, outer_first_mpls_over_udp_label); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_exp, + misc_2_mask, outer_first_mpls_over_udp_exp); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_MASK_V(flex_parser_0, bit_mask, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_udp_ttl); + } +} + +static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2_mask)) { + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, + misc_2_mask, outer_first_mpls_over_gre_label); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, + misc_2_mask, outer_first_mpls_over_gre_exp); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, + misc_2_mask, outer_first_mpls_over_udp_label); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, + misc_2_mask, outer_first_mpls_over_udp_exp); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, + misc_2_mask, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, + misc_2_mask, outer_first_mpls_over_udp_ttl); + } + return 0; +} + +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_0_tag; +} + +#define ICMP_TYPE_OFFSET_FIRST_DW 24 +#define ICMP_CODE_OFFSET_FIRST_DW 16 +#define ICMP_HEADER_DATA_OFFSET_SECOND_DW 0 + +static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3; + bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask); + u32 icmp_header_data_mask; + u32 icmp_type_mask; + u32 icmp_code_mask; + int dw0_location; + int dw1_location; + + if (is_ipv4_mask) { + icmp_header_data_mask = misc_3_mask->icmpv4_header_data; + icmp_type_mask = misc_3_mask->icmpv4_type; + icmp_code_mask = misc_3_mask->icmpv4_code; + dw0_location = caps->flex_parser_id_icmp_dw0; + dw1_location = caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data_mask = misc_3_mask->icmpv6_header_data; + icmp_type_mask = misc_3_mask->icmpv6_type; + icmp_code_mask = misc_3_mask->icmpv6_code; + dw0_location = caps->flex_parser_id_icmpv6_dw0; + dw1_location = caps->flex_parser_id_icmpv6_dw1; + } + + switch (dw0_location) { + case 4: + if (icmp_type_mask) { + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4, + (icmp_type_mask << ICMP_TYPE_OFFSET_FIRST_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_type = 0; + else + misc_3_mask->icmpv6_type = 0; + } + if (icmp_code_mask) { + u32 cur_val = MLX5_GET(ste_flex_parser_1, bit_mask, + flex_parser_4); + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_4, + cur_val | (icmp_code_mask << ICMP_CODE_OFFSET_FIRST_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_code = 0; + else + misc_3_mask->icmpv6_code = 0; + } + break; + default: + return -EINVAL; + } + + switch (dw1_location) { + case 5: + if (icmp_header_data_mask) { + MLX5_SET(ste_flex_parser_1, bit_mask, flex_parser_5, + (icmp_header_data_mask << ICMP_HEADER_DATA_OFFSET_SECOND_DW)); + if (is_ipv4_mask) + misc_3_mask->icmpv4_header_data = 0; + else + misc_3_mask->icmpv6_header_data = 0; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc_3 = &value->misc3; + u8 *tag = hw_ste->tag; + u32 icmp_header_data; + int dw0_location; + int dw1_location; + u32 icmp_type; + u32 icmp_code; + bool is_ipv4; + + is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3); + if (is_ipv4) { + icmp_header_data = misc_3->icmpv4_header_data; + icmp_type = misc_3->icmpv4_type; + icmp_code = misc_3->icmpv4_code; + dw0_location = sb->caps->flex_parser_id_icmp_dw0; + dw1_location = sb->caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data = misc_3->icmpv6_header_data; + icmp_type = misc_3->icmpv6_type; + icmp_code = misc_3->icmpv6_code; + dw0_location = sb->caps->flex_parser_id_icmpv6_dw0; + dw1_location = sb->caps->flex_parser_id_icmpv6_dw1; + } + + switch (dw0_location) { + case 4: + if (icmp_type) { + MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, + (icmp_type << ICMP_TYPE_OFFSET_FIRST_DW)); + if (is_ipv4) + misc_3->icmpv4_type = 0; + else + misc_3->icmpv6_type = 0; + } + + if (icmp_code) { + u32 cur_val = MLX5_GET(ste_flex_parser_1, tag, + flex_parser_4); + MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, + cur_val | (icmp_code << ICMP_CODE_OFFSET_FIRST_DW)); + if (is_ipv4) + misc_3->icmpv4_code = 0; + else + misc_3->icmpv6_code = 0; + } + break; + default: + return -EINVAL; + } + + switch (dw1_location) { + case 5: + if (icmp_header_data) { + MLX5_SET(ste_flex_parser_1, tag, flex_parser_5, + (icmp_header_data << ICMP_HEADER_DATA_OFFSET_SECOND_DW)); + if (is_ipv4) + misc_3->icmpv4_header_data = 0; + else + misc_3->icmpv6_header_data = 0; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_flex_parser_1_bit_mask(mask, caps, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_1_tag; + + return 0; +} + +static void dr_ste_build_general_purpose_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(general_purpose, bit_mask, + general_purpose_lookup_field, misc_2_mask, + metadata_reg_a); +} + +static int dr_ste_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc_2_mask, metadata_reg_a); + + return 0; +} + +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_general_purpose_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_general_purpose_tag; +} + +static void dr_ste_build_eth_l4_misc_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3; + + if (inner) { + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask, + inner_tcp_seq_num); + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask, + inner_tcp_ack_num); + } else { + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, seq_num, misc_3_mask, + outer_tcp_seq_num); + DR_STE_SET_MASK_V(eth_l4_misc, bit_mask, ack_num, misc_3_mask, + outer_tcp_ack_num); + } +} + +static int dr_ste_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 *tag = hw_ste->tag; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_eth_l4_misc_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, rx, inner); + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_eth_l4_misc_tag; +} + +static void dr_ste_build_flex_parser_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_misc3 *misc_3_mask = &value->misc3; + + if (misc_3_mask->outer_vxlan_gpe_flags || + misc_3_mask->outer_vxlan_gpe_next_protocol) { + MLX5_SET(ste_flex_parser_tnl, bit_mask, + flex_parser_tunneling_header_63_32, + (misc_3_mask->outer_vxlan_gpe_flags << 24) | + (misc_3_mask->outer_vxlan_gpe_next_protocol)); + misc_3_mask->outer_vxlan_gpe_flags = 0; + misc_3_mask->outer_vxlan_gpe_next_protocol = 0; + } + + if (misc_3_mask->outer_vxlan_gpe_vni) { + MLX5_SET(ste_flex_parser_tnl, bit_mask, + flex_parser_tunneling_header_31_0, + misc_3_mask->outer_vxlan_gpe_vni << 8); + misc_3_mask->outer_vxlan_gpe_vni = 0; + } +} + +static int dr_ste_build_flex_parser_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 *tag = hw_ste->tag; + + if (misc3->outer_vxlan_gpe_flags || + misc3->outer_vxlan_gpe_next_protocol) { + MLX5_SET(ste_flex_parser_tnl, tag, + flex_parser_tunneling_header_63_32, + (misc3->outer_vxlan_gpe_flags << 24) | + (misc3->outer_vxlan_gpe_next_protocol)); + misc3->outer_vxlan_gpe_flags = 0; + misc3->outer_vxlan_gpe_next_protocol = 0; + } + + if (misc3->outer_vxlan_gpe_vni) { + MLX5_SET(ste_flex_parser_tnl, tag, + flex_parser_tunneling_header_31_0, + misc3->outer_vxlan_gpe_vni << 8); + misc3->outer_vxlan_gpe_vni = 0; + } + + return 0; +} + +void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_flex_parser_tnl_bit_mask(mask, inner, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_flex_parser_tnl_tag; +} + +static void dr_ste_build_register_0_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(register_0, bit_mask, register_0_h, + misc_2_mask, metadata_reg_c_0); + DR_STE_SET_MASK_V(register_0, bit_mask, register_0_l, + misc_2_mask, metadata_reg_c_1); + DR_STE_SET_MASK_V(register_0, bit_mask, register_1_h, + misc_2_mask, metadata_reg_c_2); + DR_STE_SET_MASK_V(register_0, bit_mask, register_1_l, + misc_2_mask, metadata_reg_c_3); +} + +static int dr_ste_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_register_0_bit_mask(mask, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_register_0_tag; +} + +static void dr_ste_build_register_1_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc2 *misc_2_mask = &value->misc2; + + DR_STE_SET_MASK_V(register_1, bit_mask, register_2_h, + misc_2_mask, metadata_reg_c_4); + DR_STE_SET_MASK_V(register_1, bit_mask, register_2_l, + misc_2_mask, metadata_reg_c_5); + DR_STE_SET_MASK_V(register_1, bit_mask, register_3_h, + misc_2_mask, metadata_reg_c_6); + DR_STE_SET_MASK_V(register_1, bit_mask, register_3_l, + misc_2_mask, metadata_reg_c_7); +} + +static int dr_ste_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + dr_ste_build_register_1_bit_mask(mask, sb->bit_mask); + + sb->rx = rx; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_register_1_tag; +} + +static int dr_ste_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + if (misc_mask->source_port != 0xffff) + return -EINVAL; + + DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_MASK(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn); + + return 0; +} + +static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + struct mlx5dr_match_misc *misc = &value->misc; + struct mlx5dr_cmd_vport_cap *vport_cap; + u8 *tag = hw_ste->tag; + + DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); + + vport_cap = mlx5dr_get_vport_cap(sb->caps, misc->source_port); + if (!vport_cap) + return -EINVAL; + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_port = 0; + + return 0; +} + +int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + int ret; + + ret = dr_ste_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + if (ret) + return ret; + + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + sb->lu_type = MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP; + sb->byte_mask = dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_build_src_gvmi_qpn_tag; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c new file mode 100644 index 000000000000..e178d8d3dbc9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) +{ + struct mlx5dr_matcher *last_matcher = NULL; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *last_htbl; + int ret; + + if (action && action->action_type != DR_ACTION_TYP_FT) + return -EOPNOTSUPP; + + mutex_lock(&tbl->dmn->mutex); + + if (!list_empty(&tbl->matcher_list)) + last_matcher = list_last_entry(&tbl->matcher_list, + struct mlx5dr_matcher, + matcher_list); + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + if (last_matcher) + last_htbl = last_matcher->rx.e_anchor; + else + last_htbl = tbl->rx.s_anchor; + + tbl->rx.default_icm_addr = action ? + action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + tbl->rx.nic_dmn->default_icm_addr; + + info.type = CONNECT_MISS; + info.miss_icm_addr = tbl->rx.default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, + tbl->rx.nic_dmn, + last_htbl, + &info, true); + if (ret) { + mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret); + goto out; + } + } + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + if (last_matcher) + last_htbl = last_matcher->tx.e_anchor; + else + last_htbl = tbl->tx.s_anchor; + + tbl->tx.default_icm_addr = action ? + action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr : + tbl->tx.nic_dmn->default_icm_addr; + + info.type = CONNECT_MISS; + info.miss_icm_addr = tbl->tx.default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, + tbl->tx.nic_dmn, + last_htbl, &info, true); + if (ret) { + mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret); + goto out; + } + } + + /* Release old action */ + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + /* Set new miss action */ + tbl->miss_action = action; + if (tbl->miss_action) + refcount_inc(&action->refcount); + +out: + mutex_unlock(&tbl->dmn->mutex); + return ret; +} + +static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl) +{ + mlx5dr_htbl_put(nic_tbl->s_anchor); +} + +static void dr_table_uninit_fdb(struct mlx5dr_table *tbl) +{ + dr_table_uninit_nic(&tbl->rx); + dr_table_uninit_nic(&tbl->tx); +} + +static void dr_table_uninit(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->mutex); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_table_uninit_nic(&tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_table_uninit_nic(&tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_table_uninit_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mutex_unlock(&tbl->dmn->mutex); +} + +static int dr_table_init_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + int ret; + + nic_tbl->default_icm_addr = nic_dmn->default_icm_addr; + + nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_tbl->s_anchor) + return -ENOMEM; + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_dmn->default_icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + nic_tbl->s_anchor, + &info, true); + if (ret) + goto free_s_anchor; + + mlx5dr_htbl_get(nic_tbl->s_anchor); + + return 0; + +free_s_anchor: + mlx5dr_ste_htbl_free(nic_tbl->s_anchor); + return ret; +} + +static int dr_table_init_fdb(struct mlx5dr_table *tbl) +{ + int ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + if (ret) + return ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + if (ret) + goto destroy_rx; + + return 0; + +destroy_rx: + dr_table_uninit_nic(&tbl->rx); + return ret; +} + +static int dr_table_init(struct mlx5dr_table *tbl) +{ + int ret = 0; + + INIT_LIST_HEAD(&tbl->matcher_list); + + mutex_lock(&tbl->dmn->mutex); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mutex_unlock(&tbl->dmn->mutex); + + return ret; +} + +static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev, + tbl->table_id, + tbl->table_type); +} + +static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + u64 icm_addr_rx = 0; + u64 icm_addr_tx = 0; + int ret; + + if (tbl->rx.s_anchor) + icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr; + + if (tbl->tx.s_anchor) + icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr; + + ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, + tbl->table_type, + icm_addr_rx, + icm_addr_tx, + tbl->dmn->info.caps.max_ft_level - 1, + true, false, NULL, + &tbl->table_id); + + return ret; +} + +struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level) +{ + struct mlx5dr_table *tbl; + int ret; + + refcount_inc(&dmn->refcount); + + tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); + if (!tbl) + goto dec_ref; + + tbl->dmn = dmn; + tbl->level = level; + refcount_set(&tbl->refcount, 1); + + ret = dr_table_init(tbl); + if (ret) + goto free_tbl; + + ret = dr_table_create_sw_owned_tbl(tbl); + if (ret) + goto uninit_tbl; + + return tbl; + +uninit_tbl: + dr_table_uninit(tbl); +free_tbl: + kfree(tbl); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +int mlx5dr_table_destroy(struct mlx5dr_table *tbl) +{ + int ret; + + if (refcount_read(&tbl->refcount) > 1) + return -EBUSY; + + ret = dr_table_destroy_sw_owned_tbl(tbl); + if (ret) + return ret; + + dr_table_uninit(tbl); + + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + refcount_dec(&tbl->dmn->refcount); + kfree(tbl); + + return ret; +} + +u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl) +{ + return tbl->table_id; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h new file mode 100644 index 000000000000..a37ee6359be2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -0,0 +1,1060 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _DR_TYPES_ +#define _DR_TYPES_ + +#include <linux/mlx5/driver.h> +#include <linux/refcount.h> +#include "fs_core.h" +#include "wq.h" +#include "lib/mlx5.h" +#include "mlx5_ifc_dr.h" +#include "mlx5dr.h" + +#define DR_RULE_MAX_STES 17 +#define DR_ACTION_MAX_STES 5 +#define WIRE_PORT 0xFFFF +#define DR_STE_SVLAN 0x1 +#define DR_STE_CVLAN 0x2 + +#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) +#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) +#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) + +enum mlx5dr_icm_chunk_size { + DR_CHUNK_SIZE_1, + DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */ + DR_CHUNK_SIZE_2, + DR_CHUNK_SIZE_4, + DR_CHUNK_SIZE_8, + DR_CHUNK_SIZE_16, + DR_CHUNK_SIZE_32, + DR_CHUNK_SIZE_64, + DR_CHUNK_SIZE_128, + DR_CHUNK_SIZE_256, + DR_CHUNK_SIZE_512, + DR_CHUNK_SIZE_1K, + DR_CHUNK_SIZE_2K, + DR_CHUNK_SIZE_4K, + DR_CHUNK_SIZE_8K, + DR_CHUNK_SIZE_16K, + DR_CHUNK_SIZE_32K, + DR_CHUNK_SIZE_64K, + DR_CHUNK_SIZE_128K, + DR_CHUNK_SIZE_256K, + DR_CHUNK_SIZE_512K, + DR_CHUNK_SIZE_1024K, + DR_CHUNK_SIZE_2048K, + DR_CHUNK_SIZE_MAX, +}; + +enum mlx5dr_icm_type { + DR_ICM_TYPE_STE, + DR_ICM_TYPE_MODIFY_ACTION, +}; + +static inline enum mlx5dr_icm_chunk_size +mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk) +{ + chunk += 2; + if (chunk < DR_CHUNK_SIZE_MAX) + return chunk; + + return DR_CHUNK_SIZE_MAX; +} + +enum { + DR_STE_SIZE = 64, + DR_STE_SIZE_CTRL = 32, + DR_STE_SIZE_TAG = 16, + DR_STE_SIZE_MASK = 16, +}; + +enum { + DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK, +}; + +enum { + DR_MODIFY_ACTION_SIZE = 8, +}; + +enum mlx5dr_matcher_criteria { + DR_MATCHER_CRITERIA_EMPTY = 0, + DR_MATCHER_CRITERIA_OUTER = 1 << 0, + DR_MATCHER_CRITERIA_MISC = 1 << 1, + DR_MATCHER_CRITERIA_INNER = 1 << 2, + DR_MATCHER_CRITERIA_MISC2 = 1 << 3, + DR_MATCHER_CRITERIA_MISC3 = 1 << 4, + DR_MATCHER_CRITERIA_MAX = 1 << 5, +}; + +enum mlx5dr_action_type { + DR_ACTION_TYP_TNL_L2_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L2, + DR_ACTION_TYP_TNL_L3_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L3, + DR_ACTION_TYP_DROP, + DR_ACTION_TYP_QP, + DR_ACTION_TYP_FT, + DR_ACTION_TYP_CTR, + DR_ACTION_TYP_TAG, + DR_ACTION_TYP_MODIFY_HDR, + DR_ACTION_TYP_VPORT, + DR_ACTION_TYP_POP_VLAN, + DR_ACTION_TYP_PUSH_VLAN, + DR_ACTION_TYP_MAX, +}; + +struct mlx5dr_icm_pool; +struct mlx5dr_icm_chunk; +struct mlx5dr_icm_bucket; +struct mlx5dr_ste_htbl; +struct mlx5dr_match_param; +struct mlx5dr_cmd_caps; +struct mlx5dr_matcher_rx_tx; + +struct mlx5dr_ste { + u8 *hw_ste; + /* refcount: indicates the num of rules that using this ste */ + refcount_t refcount; + + /* attached to the miss_list head at each htbl entry */ + struct list_head miss_list_node; + + /* each rule member that uses this ste attached here */ + struct list_head rule_list; + + /* this ste is member of htbl */ + struct mlx5dr_ste_htbl *htbl; + + struct mlx5dr_ste_htbl *next_htbl; + + /* this ste is part of a rule, located in ste's chain */ + u8 ste_chain_location; +}; + +struct mlx5dr_ste_htbl_ctrl { + /* total number of valid entries belonging to this hash table. This + * includes the non collision and collision entries + */ + unsigned int num_of_valid_entries; + + /* total number of collisions entries attached to this table */ + unsigned int num_of_collisions; + unsigned int increase_threshold; + u8 may_grow:1; +}; + +struct mlx5dr_ste_htbl { + u8 lu_type; + u16 byte_mask; + refcount_t refcount; + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + + struct list_head *miss_list; + + enum mlx5dr_icm_chunk_size chunk_size; + struct mlx5dr_ste *pointing_ste; + + struct mlx5dr_ste_htbl_ctrl ctrl; +}; + +struct mlx5dr_ste_send_info { + struct mlx5dr_ste *ste; + struct list_head send_list; + u16 size; + u16 offset; + u8 data_cont[DR_STE_SIZE]; + u8 *data; +}; + +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data); + +struct mlx5dr_ste_build { + u8 inner:1; + u8 rx:1; + struct mlx5dr_cmd_caps *caps; + u8 lu_type; + u16 byte_mask; + u8 bit_mask[DR_STE_SIZE_MASK]; + int (*ste_build_tag_func)(struct mlx5dr_match_param *spec, + struct mlx5dr_ste_build *sb, + u8 *hw_ste_p); +}; + +struct mlx5dr_ste_htbl * +mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u8 lu_type, u16 byte_mask); + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl); + +static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl) +{ + if (refcount_dec_and_test(&htbl->refcount)) + mlx5dr_ste_htbl_free(htbl); +} + +static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl) +{ + refcount_inc(&htbl->refcount); +} + +/* STE utils */ +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl); +void mlx5dr_ste_init(u8 *hw_ste_p, u8 lu_type, u8 entry_type, u16 gvmi); +void mlx5dr_ste_always_hit_htbl(struct mlx5dr_ste *ste, + struct mlx5dr_ste_htbl *next_htbl); +void mlx5dr_ste_set_miss_addr(u8 *hw_ste, u64 miss_addr); +u64 mlx5dr_ste_get_miss_addr(u8 *hw_ste); +void mlx5dr_ste_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi); +void mlx5dr_ste_set_hit_addr(u8 *hw_ste, u64 icm_addr, u32 ht_size); +void mlx5dr_ste_always_miss_addr(struct mlx5dr_ste *ste, u64 miss_addr); +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask); +bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste); +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location); +void mlx5dr_ste_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag); +void mlx5dr_ste_set_counter_id(u8 *hw_ste_p, u32 ctr_id); +void mlx5dr_ste_set_tx_encap(void *hw_ste_p, u32 reformat_id, + int size, bool encap_l3); +void mlx5dr_ste_set_rx_decap(u8 *hw_ste_p); +void mlx5dr_ste_set_rx_decap_l3(u8 *hw_ste_p, bool vlan); +void mlx5dr_ste_set_rx_pop_vlan(u8 *hw_ste_p); +void mlx5dr_ste_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_tpid_pcp_dei_vid, + bool go_back); +void mlx5dr_ste_set_entry_type(u8 *hw_ste_p, u8 entry_type); +u8 mlx5dr_ste_get_entry_type(u8 *hw_ste_p); +void mlx5dr_ste_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index); +void mlx5dr_ste_set_go_back_bit(u8 *hw_ste_p); +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste); +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste); +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste); + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher); +static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + if (refcount_dec_and_test(&ste->refcount)) + mlx5dr_ste_free(ste, matcher, nic_matcher); +} + +/* initial as 0, increased only when ste appears in a new rule */ +static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste) +{ + refcount_inc(&ste->refcount); +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl); +bool mlx5dr_ste_equal_tag(void *src, void *dst); +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size); + +/* STE build functions */ +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value); +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr); +int mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_tnl(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); + +/* Actions utils */ +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz); + +struct mlx5dr_match_spec { + u32 smac_47_16; /* Source MAC address of incoming packet */ + /* Incoming packet Ethertype - this is the Ethertype + * following the last VLAN tag of the packet + */ + u32 ethertype:16; + u32 smac_15_0:16; /* Source MAC address of incoming packet */ + u32 dmac_47_16; /* Destination MAC address of incoming packet */ + /* VLAN ID of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_vid:12; + /* CFI bit of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_cfi:1; + /* Priority of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_prio:3; + u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ + /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; + * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS + */ + u32 tcp_flags:9; + u32 ip_version:4; /* IP version */ + u32 frag:1; /* Packet is an IP fragment */ + /* The first vlan in the packet is s-vlan (0x8a88). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 svlan_tag:1; + /* The first vlan in the packet is c-vlan (0x8100). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 cvlan_tag:1; + /* Explicit Congestion Notification derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_ecn:2; + /* Differentiated Services Code Point derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_dscp:6; + u32 ip_protocol:8; /* IP protocol */ + /* TCP destination port. + * tcp and udp sport/dport are mutually exclusive + */ + u32 tcp_dport:16; + /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 tcp_sport:16; + u32 ttl_hoplimit:8; + u32 reserved:24; + /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_dport:16; + /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_sport:16; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_127_96; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_95_64; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_63_32; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_31_0; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_127_96; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_95_64; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_63_32; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_31_0; +}; + +struct mlx5dr_match_misc { + u32 source_sqn:24; /* Source SQN */ + u32 source_vhca_port:4; + /* used with GRE, sequence number exist when gre_s_present == 1 */ + u32 gre_s_present:1; + /* used with GRE, key exist when gre_k_present == 1 */ + u32 gre_k_present:1; + u32 reserved_auto1:1; + /* used with GRE, checksum exist when gre_c_present == 1 */ + u32 gre_c_present:1; + /* Source port.;0xffff determines wire port */ + u32 source_port:16; + u32 reserved_auto2:16; + /* VLAN ID of first VLAN tag the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_vid:12; + /* CFI bit of first VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_cfi:1; + /* Priority of second VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_prio:3; + /* VLAN ID of first VLAN tag the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_vid:12; + /* CFI bit of first VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_cfi:1; + /* Priority of second VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_prio:3; + u32 gre_protocol:16; /* GRE Protocol (outer) */ + u32 reserved_auto3:12; + /* The second vlan in the inner header of the packet is s-vlan (0x8a88). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_svlan_tag:1; + /* The second vlan in the outer header of the packet is s-vlan (0x8a88). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_svlan_tag:1; + /* The second vlan in the inner header of the packet is c-vlan (0x8100). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_cvlan_tag:1; + /* The second vlan in the outer header of the packet is c-vlan (0x8100). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_cvlan_tag:1; + u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + u32 gre_key_h:24; /* GRE Key[31:8] (outer) */ + u32 reserved_auto4:8; + u32 vxlan_vni:24; /* VXLAN VNI (outer) */ + u32 geneve_oam:1; /* GENEVE OAM field (outer) */ + u32 reserved_auto5:7; + u32 geneve_vni:24; /* GENEVE VNI field (outer) */ + u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */ + u32 reserved_auto6:12; + u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */ + u32 reserved_auto7:12; + u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */ + u32 reserved_auto8:10; + u32 bth_dst_qp:24; /* Destination QP in BTH header */ + u32 reserved_auto9:8; + u8 reserved_auto10[20]; +}; + +struct mlx5dr_match_misc2 { + u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ + u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ + u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ + u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */ + u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ + u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ + u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */ + u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */ + u32 metadata_reg_c_7; /* metadata_reg_c_7 */ + u32 metadata_reg_c_6; /* metadata_reg_c_6 */ + u32 metadata_reg_c_5; /* metadata_reg_c_5 */ + u32 metadata_reg_c_4; /* metadata_reg_c_4 */ + u32 metadata_reg_c_3; /* metadata_reg_c_3 */ + u32 metadata_reg_c_2; /* metadata_reg_c_2 */ + u32 metadata_reg_c_1; /* metadata_reg_c_1 */ + u32 metadata_reg_c_0; /* metadata_reg_c_0 */ + u32 metadata_reg_a; /* metadata_reg_a */ + u32 metadata_reg_b; /* metadata_reg_b */ + u8 reserved_auto2[8]; +}; + +struct mlx5dr_match_misc3 { + u32 inner_tcp_seq_num; + u32 outer_tcp_seq_num; + u32 inner_tcp_ack_num; + u32 outer_tcp_ack_num; + u32 outer_vxlan_gpe_vni:24; + u32 reserved_auto1:8; + u32 reserved_auto2:16; + u32 outer_vxlan_gpe_flags:8; + u32 outer_vxlan_gpe_next_protocol:8; + u32 icmpv4_header_data; + u32 icmpv6_header_data; + u32 icmpv6_code:8; + u32 icmpv6_type:8; + u32 icmpv4_code:8; + u32 icmpv4_type:8; + u8 reserved_auto3[0x1c]; +}; + +struct mlx5dr_match_param { + struct mlx5dr_match_spec outer; + struct mlx5dr_match_misc misc; + struct mlx5dr_match_spec inner; + struct mlx5dr_match_misc2 misc2; + struct mlx5dr_match_misc3 misc3; +}; + +#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ + (_misc3)->icmpv4_code || \ + (_misc3)->icmpv4_header_data) + +struct mlx5dr_esw_caps { + u64 drop_icm_address_rx; + u64 drop_icm_address_tx; + u64 uplink_icm_address_rx; + u64 uplink_icm_address_tx; + bool sw_owner; +}; + +struct mlx5dr_cmd_vport_cap { + u16 vport_gvmi; + u16 vhca_gvmi; + u64 icm_address_rx; + u64 icm_address_tx; + u32 num; +}; + +struct mlx5dr_cmd_caps { + u16 gvmi; + u64 nic_rx_drop_address; + u64 nic_tx_drop_address; + u64 nic_tx_allow_address; + u64 esw_rx_drop_address; + u64 esw_tx_drop_address; + u32 log_icm_size; + u64 hdr_modify_icm_addr; + u32 flex_protocols; + u8 flex_parser_id_icmp_dw0; + u8 flex_parser_id_icmp_dw1; + u8 flex_parser_id_icmpv6_dw0; + u8 flex_parser_id_icmpv6_dw1; + u8 max_ft_level; + u16 roce_min_src_udp; + u8 num_esw_ports; + bool eswitch_manager; + bool rx_sw_owner; + bool tx_sw_owner; + bool fdb_sw_owner; + u32 num_vports; + struct mlx5dr_esw_caps esw_caps; + struct mlx5dr_cmd_vport_cap *vports_caps; + bool prio_tag_required; +}; + +struct mlx5dr_domain_rx_tx { + u64 drop_icm_addr; + u64 default_icm_addr; + enum mlx5dr_ste_entry_type ste_type; +}; + +struct mlx5dr_domain_info { + bool supp_sw_steering; + u32 max_inline_size; + u32 max_send_wr; + u32 max_log_sw_icm_sz; + u32 max_log_action_icm_sz; + struct mlx5dr_domain_rx_tx rx; + struct mlx5dr_domain_rx_tx tx; + struct mlx5dr_cmd_caps caps; +}; + +struct mlx5dr_domain_cache { + struct mlx5dr_fw_recalc_cs_ft **recalc_cs_ft; +}; + +struct mlx5dr_domain { + struct mlx5dr_domain *peer_dmn; + struct mlx5_core_dev *mdev; + u32 pdn; + struct mlx5_uars_page *uar; + enum mlx5dr_domain_type type; + refcount_t refcount; + struct mutex mutex; /* protect domain */ + struct mlx5dr_icm_pool *ste_icm_pool; + struct mlx5dr_icm_pool *action_icm_pool; + struct mlx5dr_send_ring *send_ring; + struct mlx5dr_domain_info info; + struct mlx5dr_domain_cache cache; +}; + +struct mlx5dr_table_rx_tx { + struct mlx5dr_ste_htbl *s_anchor; + struct mlx5dr_domain_rx_tx *nic_dmn; + u64 default_icm_addr; +}; + +struct mlx5dr_table { + struct mlx5dr_domain *dmn; + struct mlx5dr_table_rx_tx rx; + struct mlx5dr_table_rx_tx tx; + u32 level; + u32 table_type; + u32 table_id; + struct list_head matcher_list; + struct mlx5dr_action *miss_action; + refcount_t refcount; +}; + +struct mlx5dr_matcher_rx_tx { + struct mlx5dr_ste_htbl *s_htbl; + struct mlx5dr_ste_htbl *e_anchor; + struct mlx5dr_ste_build *ste_builder; + struct mlx5dr_ste_build ste_builder4[DR_RULE_MAX_STES]; + struct mlx5dr_ste_build ste_builder6[DR_RULE_MAX_STES]; + u8 num_of_builders; + u8 num_of_builders4; + u8 num_of_builders6; + u64 default_icm_addr; + struct mlx5dr_table_rx_tx *nic_tbl; +}; + +struct mlx5dr_matcher { + struct mlx5dr_table *tbl; + struct mlx5dr_matcher_rx_tx rx; + struct mlx5dr_matcher_rx_tx tx; + struct list_head matcher_list; + u16 prio; + struct mlx5dr_match_param mask; + u8 match_criteria; + refcount_t refcount; + struct mlx5dv_flow_matcher *dv_matcher; +}; + +struct mlx5dr_rule_member { + struct mlx5dr_ste *ste; + /* attached to mlx5dr_rule via this */ + struct list_head list; + /* attached to mlx5dr_ste via this */ + struct list_head use_ste_list; +}; + +struct mlx5dr_action { + enum mlx5dr_action_type action_type; + refcount_t refcount; + union { + struct { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u32 data_size; + u16 num_of_actions; + u32 index; + u8 allow_rx:1; + u8 allow_tx:1; + u8 modify_ttl:1; + } rewrite; + struct { + struct mlx5dr_domain *dmn; + u32 reformat_id; + u32 reformat_size; + } reformat; + struct { + u8 is_fw_tbl:1; + union { + struct mlx5dr_table *tbl; + struct { + struct mlx5_flow_table *ft; + u64 rx_icm_addr; + u64 tx_icm_addr; + struct mlx5_core_dev *mdev; + } fw_tbl; + }; + } dest_tbl; + struct { + u32 ctr_id; + u32 offeset; + } ctr; + struct { + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_vport_cap *caps; + u32 num; + } vport; + struct { + u32 vlan_hdr; /* tpid_pcp_dei_vid */ + } push_vlan; + u32 flow_tag; + }; +}; + +enum mlx5dr_connect_type { + CONNECT_HIT = 1, + CONNECT_MISS = 2, +}; + +struct mlx5dr_htbl_connect_info { + enum mlx5dr_connect_type type; + union { + struct mlx5dr_ste_htbl *hit_next_htbl; + u64 miss_icm_addr; + }; +}; + +struct mlx5dr_rule_rx_tx { + struct list_head rule_members_list; + struct mlx5dr_matcher_rx_tx *nic_matcher; +}; + +struct mlx5dr_rule { + struct mlx5dr_matcher *matcher; + struct mlx5dr_rule_rx_tx rx; + struct mlx5dr_rule_rx_tx tx; + struct list_head rule_actions_list; +}; + +void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste, + struct mlx5dr_ste *ste); + +struct mlx5dr_icm_chunk { + struct mlx5dr_icm_bucket *bucket; + struct list_head chunk_list; + u32 rkey; + u32 num_of_entries; + u32 byte_size; + u64 icm_addr; + u64 mr_addr; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + struct list_head *miss_list; +}; + +static inline int +mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; +} + +static inline int +mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + bool ipv6); + +static inline u32 +mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size) +{ + return 1 << chunk_size; +} + +static inline int +mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size, + enum mlx5dr_icm_type icm_type) +{ + int num_of_entries; + int entry_size; + + if (icm_type == DR_ICM_TYPE_STE) + entry_size = DR_STE_SIZE; + else + entry_size = DR_MODIFY_ACTION_SIZE; + + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + + return entry_size * num_of_entries; +} + +static inline struct mlx5dr_cmd_vport_cap * +mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport) +{ + if (!caps->vports_caps || + (vport >= caps->num_vports && vport != WIRE_PORT)) + return NULL; + + if (vport == WIRE_PORT) + vport = caps->num_vports; + + return &caps->vports_caps[vport]; +} + +struct mlx5dr_cmd_query_flow_table_details { + u8 status; + u8 level; + u64 sw_owner_icm_root_1; + u64 sw_owner_icm_root_0; +}; + +/* internal API functions */ +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps); +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx); +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, u16 *gvmi); +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps); +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev); +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u32 vport_id); +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id); +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id); +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id); +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id); +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id); +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + u32 table_type, + u64 icm_addr_rx, + u64 icm_addr_tx, + u8 level, + bool sw_owner, + bool term_tbl, + u64 *fdb_rx_icm_addr, + u32 *table_id); +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type); +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output); +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id); +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id); + +struct mlx5dr_cmd_gid_attr { + u8 gid[16]; + u8 mac[6]; + u32 roce_ver; +}; + +struct mlx5dr_cmd_qp_create_attr { + u32 page_id; + u32 pdn; + u32 cqn; + u32 pm_state; + u32 service_type; + u32 buff_umem_id; + u32 db_umem_id; + u32 sq_wqe_cnt; + u32 rq_wqe_cnt; + u32 rq_wqe_shift; +}; + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr); + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type); +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool); + +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size); +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk); +bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste); +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste); +void mlx5dr_ste_set_formatted_ste(u16 gvmi, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info); +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask); + +void mlx5dr_crc32_init_table(void); +u32 mlx5dr_crc32_slice8_calc(const void *input_data, size_t length); + +struct mlx5dr_qp { + struct mlx5_core_dev *mdev; + struct mlx5_wq_qp wq; + struct mlx5_uars_page *uar; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_qp mqp; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int *wqe_head; + unsigned int wqe_cnt; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int wqe_cnt; + } rq; + int max_inline_data; +}; + +struct mlx5dr_cq { + struct mlx5_core_dev *mdev; + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct mlx5dr_qp *qp; +}; + +struct mlx5dr_mr { + struct mlx5_core_dev *mdev; + struct mlx5_core_mkey mkey; + dma_addr_t dma_addr; + void *addr; + size_t size; +}; + +#define MAX_SEND_CQE 64 +#define MIN_READ_SYNC 64 + +struct mlx5dr_send_ring { + struct mlx5dr_cq *cq; + struct mlx5dr_qp *qp; + struct mlx5dr_mr *mr; + /* How much wqes are waiting for completion */ + u32 pending_wqe; + /* Signal request per this trash hold value */ + u16 signal_th; + /* Each post_send_size less than max_post_send_size */ + u32 max_post_send_size; + /* manage the send queue */ + u32 tx_head; + void *buf; + u32 buf_size; + struct ib_wc wc[MAX_SEND_CQE]; + u8 sync_buff[MIN_READ_SYNC]; + struct mlx5dr_mr *sync_mr; +}; + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring); +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, + struct mlx5dr_ste *ste, + u8 *data, + u16 size, + u16 offset); +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask); +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste); +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action); + +struct mlx5dr_fw_recalc_cs_ft { + u64 rx_icm_addr; + u32 table_id; + u32 group_id; + u32 modify_hdr_id; +}; + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num); +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft); +int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u32 vport_num, + u64 *rx_icm_addr); +#endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c new file mode 100644 index 000000000000..3d587d0bdbbe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -0,0 +1,600 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5dr.h" +#include "fs_dr.h" + +static bool mlx5_dr_is_fw_table(u32 flags) +{ + if (flags & MLX5_FLOW_TABLE_TERMINATION) + return true; + + return false; +} + +static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn, + disconnect); +} + +static int set_miss_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_action *old_miss_action; + struct mlx5dr_action *action = NULL; + struct mlx5dr_table *next_tbl; + int err; + + next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL; + if (next_tbl) { + action = mlx5dr_action_create_dest_table(next_tbl); + if (!action) + return -EINVAL; + } + old_miss_action = ft->fs_dr_table.miss_action; + err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); + if (err && action) { + err = mlx5dr_action_destroy(action); + if (err) { + action = NULL; + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + } + ft->fs_dr_table.miss_action = action; + if (old_miss_action) { + err = mlx5dr_action_destroy(old_miss_action); + if (err) + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + + return err; +} + +static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + unsigned int log_size, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_table *tbl; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, + log_size, + next_ft); + + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, + ft->level); + if (!tbl) { + mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); + return -EINVAL; + } + + ft->fs_dr_table.dr_table = tbl; + ft->id = mlx5dr_table_get_id(tbl); + + if (next_ft) { + err = set_miss_action(ns, ft, next_ft); + if (err) { + mlx5dr_table_destroy(tbl); + ft->fs_dr_table.dr_table = NULL; + return err; + } + } + + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action = ft->fs_dr_table.miss_action; + int err; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); + + err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n", + err); + return err; + } + if (action) { + err = mlx5dr_action_destroy(action); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n", + err); + return err; + } + } + + return err; +} + +static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + return set_miss_action(ns, ft, next_ft); +} + +static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + struct mlx5dr_matcher *matcher; + u16 priority = MLX5_GET(create_flow_group_in, in, + start_flow_index); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + in, + match_criteria_enable); + struct mlx5dr_match_parameters mask; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, + fg); + + mask.match_buf = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + mask.match_sz = sizeof(fg->mask.match_criteria); + + matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table, + priority, + match_criteria_enable, + &mask); + if (!matcher) { + mlx5_core_err(ns->dev, "Failed creating matcher\n"); + return -EINVAL; + } + + fg->fs_dr_matcher.dr_matcher = matcher; + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg); + + return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher); +} + +static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num, + dest_attr->vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID, + dest_attr->vport.vhca_id); +} + +static struct mlx5dr_action *create_ft_action(struct mlx5_core_dev *dev, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_table *dest_ft = dst->dest_attr.ft; + + if (mlx5_dr_is_fw_table(dest_ft->flags)) + return mlx5dr_create_action_dest_flow_fw_table(dest_ft, dev); + return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table); +} + +static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain, + struct mlx5_fs_vlan *vlan) +{ + u16 n_ethtype = vlan->ethtype; + u8 prio = vlan->prio; + u16 vid = vlan->vid; + u32 vlan_hdr; + + vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid; + return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr)); +} + +#define MLX5_FLOW_CONTEXT_ACTION_MAX 20 +static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *term_action = NULL; + struct mlx5dr_match_parameters params; + struct mlx5_core_dev *dev = ns->dev; + struct mlx5dr_action **fs_dr_actions; + struct mlx5dr_action *tmp_action; + struct mlx5dr_action **actions; + bool delay_encap_set = false; + struct mlx5dr_rule *rule; + struct mlx5_flow_rule *dst; + int fs_dr_num_actions = 0; + int num_actions = 0; + size_t match_sz; + int err = 0; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); + + actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions), + GFP_KERNEL); + if (!actions) + return -ENOMEM; + + fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*fs_dr_actions), GFP_KERNEL); + if (!fs_dr_actions) { + kfree(actions); + return -ENOMEM; + } + + match_sz = sizeof(fte->val); + + /* The order of the actions are must to be keep, only the following + * order is supported by SW steering: + * TX: push vlan -> modify header -> encap + * RX: decap -> pop vlan -> modify header + */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + enum mlx5dr_action_reformat_type decap_type = + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2; + + tmp_action = mlx5dr_action_create_packet_reformat(domain, + decap_type, 0, + NULL); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + bool is_decap = fte->action.pkt_reformat->reformat_type == + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + + if (is_decap) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + else + delay_encap_set = true; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + actions[num_actions++] = + fte->action.modify_hdr->action.dr_action; + + if (delay_encap_set) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + + /* The order of the actions below is not important */ + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + tmp_action = mlx5dr_action_create_drop(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_action = tmp_action; + } + + if (fte->flow_context.flow_tag) { + tmp_action = + mlx5dr_action_create_tag(fte->flow_context.flow_tag); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; + u32 id; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -ENOSPC; + goto free_actions; + } + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + id = dst->dest_attr.counter_id; + + tmp_action = + mlx5dr_action_create_flow_counter(id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + tmp_action = create_ft_action(dev, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_action = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + tmp_action = create_vport_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_action = tmp_action; + break; + default: + err = -EOPNOTSUPP; + goto free_actions; + } + } + } + + params.match_sz = match_sz; + params.match_buf = (u64 *)fte->val; + + if (term_action) + actions[num_actions++] = term_action; + + rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher, + ¶ms, + num_actions, + actions); + if (!rule) { + err = -EINVAL; + goto free_actions; + } + + kfree(actions); + fte->fs_dr_rule.dr_rule = rule; + fte->fs_dr_rule.num_actions = fs_dr_num_actions; + fte->fs_dr_rule.dr_actions = fs_dr_actions; + + return 0; + +free_actions: + for (i = 0; i < fs_dr_num_actions; i++) + if (!IS_ERR_OR_NULL(fs_dr_actions[i])) + mlx5dr_action_destroy(fs_dr_actions[i]); + + mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err); + kfree(actions); + kfree(fs_dr_actions); + return err; +} + +static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + int dr_reformat; + + switch (reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2; + break; + case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2; + break; + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3; + break; + default: + mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", + reformat_type); + return -EOPNOTSUPP; + } + + action = mlx5dr_action_create_packet_reformat(dr_domain, + dr_reformat, + size, + reformat_data); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n"); + return -EINVAL; + } + + pkt_reformat->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + mlx5dr_action_destroy(pkt_reformat->action.dr_action); +} + +static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + size_t actions_sz; + + actions_sz = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * + num_actions; + action = mlx5dr_action_create_modify_header(dr_domain, 0, + actions_sz, + modify_actions); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating modify-header action\n"); + return -EINVAL; + } + + modify_hdr->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + mlx5dr_action_destroy(modify_hdr->action.dr_action); +} + +static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule; + int err; + int i; + + if (mlx5_dr_is_fw_table(ft->flags)) + return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte); + + err = mlx5dr_rule_destroy(rule->dr_rule); + if (err) + return err; + + for (i = 0; i < rule->num_actions; i++) + if (!IS_ERR_OR_NULL(rule->dr_actions[i])) + mlx5dr_action_destroy(rule->dr_actions[i]); + + kfree(rule->dr_actions); + return 0; +} + +static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns) +{ + struct mlx5dr_domain *peer_domain = NULL; + + if (peer_ns) + peer_domain = peer_ns->fs_dr_domain.dr_domain; + mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, + peer_domain); + return 0; +} + +static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns) +{ + ns->fs_dr_domain.dr_domain = + mlx5dr_domain_create(ns->dev, + MLX5DR_DOMAIN_TYPE_FDB); + if (!ns->fs_dr_domain.dr_domain) { + mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); +} + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return mlx5dr_is_supported(dev); +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { + .create_flow_table = mlx5_cmd_dr_create_flow_table, + .destroy_flow_table = mlx5_cmd_dr_destroy_flow_table, + .modify_flow_table = mlx5_cmd_dr_modify_flow_table, + .create_flow_group = mlx5_cmd_dr_create_flow_group, + .destroy_flow_group = mlx5_cmd_dr_destroy_flow_group, + .create_fte = mlx5_cmd_dr_create_fte, + .update_fte = mlx5_cmd_dr_update_fte, + .delete_fte = mlx5_cmd_dr_delete_fte, + .update_root_ft = mlx5_cmd_dr_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc, + .set_peer = mlx5_cmd_dr_set_peer, + .create_ns = mlx5_cmd_dr_create_ns, + .destroy_ns = mlx5_cmd_dr_destroy_ns, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return &mlx5_flow_cmds_dr; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h new file mode 100644 index 000000000000..1fb185d6ac7f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2019 Mellanox Technologies + */ + +#ifndef _MLX5_FS_DR_ +#define _MLX5_FS_DR_ + +#include "mlx5dr.h" + +struct mlx5_flow_root_namespace; +struct fs_fte; + +struct mlx5_fs_dr_action { + struct mlx5dr_action *dr_action; +}; + +struct mlx5_fs_dr_ns { + struct mlx5_dr_ns *dr_ns; +}; + +struct mlx5_fs_dr_rule { + struct mlx5dr_rule *dr_rule; + /* Only actions created by fs_dr */ + struct mlx5dr_action **dr_actions; + int num_actions; +}; + +struct mlx5_fs_dr_domain { + struct mlx5dr_domain *dr_domain; +}; + +struct mlx5_fs_dr_matcher { + struct mlx5dr_matcher *dr_matcher; +}; + +struct mlx5_fs_dr_table { + struct mlx5dr_table *dr_table; + struct mlx5dr_action *miss_action; +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); + +#else + +static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return NULL; +} + +static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return false; +} + +#endif /* CONFIG_MLX5_SW_STEERING */ +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h new file mode 100644 index 000000000000..596c927220d9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -0,0 +1,604 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef MLX5_IFC_DR_H +#define MLX5_IFC_DR_H + +enum { + MLX5DR_ACTION_MDFY_HW_FLD_L2_0 = 0, + MLX5DR_ACTION_MDFY_HW_FLD_L2_1 = 1, + MLX5DR_ACTION_MDFY_HW_FLD_L2_2 = 2, + MLX5DR_ACTION_MDFY_HW_FLD_L3_0 = 3, + MLX5DR_ACTION_MDFY_HW_FLD_L3_1 = 4, + MLX5DR_ACTION_MDFY_HW_FLD_L3_2 = 5, + MLX5DR_ACTION_MDFY_HW_FLD_L3_3 = 6, + MLX5DR_ACTION_MDFY_HW_FLD_L3_4 = 7, + MLX5DR_ACTION_MDFY_HW_FLD_L4_0 = 8, + MLX5DR_ACTION_MDFY_HW_FLD_L4_1 = 9, + MLX5DR_ACTION_MDFY_HW_FLD_MPLS = 10, + MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_0 = 11, + MLX5DR_ACTION_MDFY_HW_FLD_REG_0 = 12, + MLX5DR_ACTION_MDFY_HW_FLD_REG_1 = 13, + MLX5DR_ACTION_MDFY_HW_FLD_REG_2 = 14, + MLX5DR_ACTION_MDFY_HW_FLD_REG_3 = 15, + MLX5DR_ACTION_MDFY_HW_FLD_L4_2 = 16, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_0 = 17, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_1 = 18, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_2 = 19, + MLX5DR_ACTION_MDFY_HW_FLD_FLEX_3 = 20, + MLX5DR_ACTION_MDFY_HW_FLD_L2_TNL_1 = 21, + MLX5DR_ACTION_MDFY_HW_FLD_METADATA = 22, + MLX5DR_ACTION_MDFY_HW_FLD_RESERVED = 23, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_OP_SET = 0x2, + MLX5DR_ACTION_MDFY_HW_OP_ADD = 0x3, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE = 0x0, + MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV4 = 0x1, + MLX5DR_ACTION_MDFY_HW_HDR_L3_IPV6 = 0x2, +}; + +enum { + MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE = 0x0, + MLX5DR_ACTION_MDFY_HW_HDR_L4_TCP = 0x1, + MLX5DR_ACTION_MDFY_HW_HDR_L4_UDP = 0x2, +}; + +enum { + MLX5DR_STE_LU_TYPE_NOP = 0x00, + MLX5DR_STE_LU_TYPE_SRC_GVMI_AND_QP = 0x05, + MLX5DR_STE_LU_TYPE_ETHL2_TUNNELING_I = 0x0a, + MLX5DR_STE_LU_TYPE_ETHL2_DST_O = 0x06, + MLX5DR_STE_LU_TYPE_ETHL2_DST_I = 0x07, + MLX5DR_STE_LU_TYPE_ETHL2_DST_D = 0x1b, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_O = 0x08, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_I = 0x09, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_D = 0x1c, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_O = 0x36, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_I = 0x37, + MLX5DR_STE_LU_TYPE_ETHL2_SRC_DST_D = 0x38, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10, + MLX5DR_STE_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a, + MLX5DR_STE_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b, + MLX5DR_STE_LU_TYPE_ETHL4_O = 0x13, + MLX5DR_STE_LU_TYPE_ETHL4_I = 0x14, + MLX5DR_STE_LU_TYPE_ETHL4_D = 0x21, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_O = 0x2c, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_I = 0x2d, + MLX5DR_STE_LU_TYPE_ETHL4_MISC_D = 0x2e, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_O = 0x15, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_I = 0x24, + MLX5DR_STE_LU_TYPE_MPLS_FIRST_D = 0x25, + MLX5DR_STE_LU_TYPE_GRE = 0x16, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_0 = 0x22, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_1 = 0x23, + MLX5DR_STE_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19, + MLX5DR_STE_LU_TYPE_GENERAL_PURPOSE = 0x18, + MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_0 = 0x2f, + MLX5DR_STE_LU_TYPE_STEERING_REGISTERS_1 = 0x30, + MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f, +}; + +enum mlx5dr_ste_entry_type { + MLX5DR_STE_TYPE_TX = 1, + MLX5DR_STE_TYPE_RX = 2, + MLX5DR_STE_TYPE_MODIFY_PKT = 6, +}; + +struct mlx5_ifc_ste_general_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 reserved_at_60[0xa0]; + u8 tag_value[0x60]; + u8 bit_mask[0x60]; +}; + +struct mlx5_ifc_ste_sx_transmit_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 sx_wire[0x1]; + u8 sx_func_lb[0x1]; + u8 sx_sniffer[0x1]; + u8 sx_wire_enable[0x1]; + u8 sx_func_lb_enable[0x1]; + u8 sx_sniffer_enable[0x1]; + u8 action_type[0x3]; + u8 reserved_at_69[0x1]; + u8 action_description[0x6]; + u8 gvmi[0x10]; + + u8 encap_pointer_vlan_data[0x20]; + + u8 loopback_syndome_en[0x8]; + u8 loopback_syndome[0x8]; + u8 counter_trigger[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 go_back[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_rx_steering_mult_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 member_count[0x10]; + u8 gvmi[0x10]; + + u8 qp_list_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_modify_packet_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 number_of_re_write_actions[0x10]; + u8 gvmi[0x10]; + + u8 header_re_write_actions_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_eth_l2_src_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 l3_type[0x2]; + u8 reserved_at_66[0x6]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits { + u8 destination_address[0x20]; + + u8 source_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_62[0x2]; + u8 reserved_at_64[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 reserved_at_76[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits { + u8 dst_ip_127_96[0x20]; + + u8 dst_ip_95_64[0x20]; + + u8 dst_ip_63_32[0x20]; + + u8 dst_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 l2_tunneling_network_id[0x20]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 reserved_at_6c[0x3]; + u8 gre_key_flag[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_src_bits { + u8 src_ip_127_96[0x20]; + + u8 src_ip_95_64[0x20]; + + u8 src_ip_63_32[0x20]; + + u8 src_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits { + u8 version[0x4]; + u8 ihl[0x4]; + u8 reserved_at_8[0x8]; + u8 total_length[0x10]; + + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 time_to_live[0x8]; + u8 reserved_at_48[0x8]; + u8 checksum[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_eth_l4_bits { + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_2[0x6]; + u8 protocol[0x8]; + u8 dst_port[0x10]; + + u8 ipv6_version[0x4]; + u8 reserved_at_24[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 src_port[0x10]; + + u8 ipv6_payload_length[0x10]; + u8 ipv6_hop_limit[0x8]; + u8 dscp[0x6]; + u8 reserved_at_5e[0x2]; + + u8 tcp_data_offset[0x4]; + u8 reserved_at_64[0x8]; + u8 flow_label[0x14]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_bits { + u8 checksum[0x10]; + u8 length[0x10]; + + u8 seq_num[0x20]; + + u8 ack_num[0x20]; + + u8 urgent_pointer[0x10]; + u8 window_size[0x10]; +}; + +struct mlx5_ifc_ste_mpls_bits { + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; + + u8 reserved_at_60[0x16]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; +}; + +struct mlx5_ifc_ste_register_0_bits { + u8 register_0_h[0x20]; + + u8 register_0_l[0x20]; + + u8 register_1_h[0x20]; + + u8 register_1_l[0x20]; +}; + +struct mlx5_ifc_ste_register_1_bits { + u8 register_2_h[0x20]; + + u8 register_2_l[0x20]; + + u8 register_3_h[0x20]; + + u8 register_3_l[0x20]; +}; + +struct mlx5_ifc_ste_gre_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_30[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 checksum[0x10]; + u8 offset[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 seq_num[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_0_bits { + u8 parser_3_label[0x14]; + u8 parser_3_exp[0x3]; + u8 parser_3_s_bos[0x1]; + u8 parser_3_ttl[0x8]; + + u8 flex_parser_2[0x20]; + + u8 flex_parser_1[0x20]; + + u8 flex_parser_0[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_1_bits { + u8 flex_parser_7[0x20]; + + u8 flex_parser_6[0x20]; + + u8 flex_parser_5[0x20]; + + u8 flex_parser_4[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_bits { + u8 flex_parser_tunneling_header_63_32[0x20]; + + u8 flex_parser_tunneling_header_31_0[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_general_purpose_bits { + u8 general_purpose_lookup_field[0x20]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_bits { + u8 loopback_syndrome[0x8]; + u8 reserved_at_8[0x8]; + u8 source_gvmi[0x10]; + + u8 reserved_at_20[0x5]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 source_is_requestor[0x1]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_l2_hdr_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 ethertype[0x10]; + u8 vlan_type[0x10]; + + u8 vlan[0x10]; + u8 reserved_at_90[0x10]; +}; + +/* Both HW set and HW add share the same HW format with different opcodes */ +struct mlx5_ifc_dr_action_hw_set_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x3]; + u8 destination_length[0x5]; + + u8 inline_data[0x20]; +}; + +#endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h new file mode 100644 index 000000000000..adda9cbfba45 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _MLX5DR_H_ +#define _MLX5DR_H_ + +struct mlx5dr_domain; +struct mlx5dr_table; +struct mlx5dr_matcher; +struct mlx5dr_rule; +struct mlx5dr_action; + +enum mlx5dr_domain_type { + MLX5DR_DOMAIN_TYPE_NIC_RX, + MLX5DR_DOMAIN_TYPE_NIC_TX, + MLX5DR_DOMAIN_TYPE_FDB, +}; + +enum mlx5dr_domain_sync_flags { + MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0, + MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1, +}; + +enum mlx5dr_action_reformat_type { + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2, + DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3, +}; + +struct mlx5dr_match_parameters { + size_t match_sz; + u64 *match_buf; /* Device spec format */ +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); + +int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); + +int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn); + +struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level); + +int mlx5dr_table_destroy(struct mlx5dr_table *table); + +u32 mlx5dr_table_get_id(struct mlx5dr_table *table); + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask); + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]); + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule); + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action); + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, + struct mlx5_core_dev *mdev); + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u32 vport, u8 vhca_id_valid, + u16 vhca_id); + +struct mlx5dr_action *mlx5dr_action_create_drop(void); + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value); + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id); + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data); + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]); + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void); + +struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr); + +int mlx5dr_action_destroy(struct mlx5dr_action *action); + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); +} + +#else /* CONFIG_MLX5_SW_STEERING */ + +static inline struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) { return NULL; } + +static inline int +mlx5dr_domain_destroy(struct mlx5dr_domain *domain) { return 0; } + +static inline int +mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags) { return 0; } + +static inline void +mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn) { } + +static inline struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level) { return NULL; } + +static inline int +mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; } + +static inline u32 +mlx5dr_table_get_id(struct mlx5dr_table *table) { return 0; } + +static inline struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u16 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) { return NULL; } + +static inline int +mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { return 0; } + +static inline struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[]) { return NULL; } + +static inline int +mlx5dr_rule_destroy(struct mlx5dr_rule *rule) { return 0; } + +static inline int +mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) { return 0; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft, + struct mlx5_core_dev *mdev) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u32 vport, u8 vhca_id_valid, + u16 vhca_id) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_drop(void) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_tag(u32 tag_value) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + size_t data_sz, + void *data) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_pop_vlan(void) { return NULL; } + +static inline struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, + __be32 vlan_hdr) { return NULL; } + +static inline int +mlx5dr_action_destroy(struct mlx5dr_action *action) { return 0; } + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_SW_STEERING */ + +#endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index c912d82ca64b..30f7848a6f88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -122,12 +122,13 @@ void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline_mode) { switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode)) + break; + /* fall through */ case MLX5_CAP_INLINE_MODE_L2: *min_inline_mode = MLX5_INLINE_MODE_L2; break; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); - break; case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: *min_inline_mode = MLX5_INLINE_MODE_NONE; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 953cc8efba69..dd2315ce4441 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -44,6 +44,11 @@ u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) return wq->fbc.sz_m1 + 1; } +u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.log_stride; +} + u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) { return (u32)wq->fbc.sz_m1 + 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index f1ec58c9e9e3..55791f71a778 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -89,6 +89,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl); u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq); +u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq); int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 06c80343d9ed..f458fd1ce9f8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -71,7 +71,7 @@ config MLXSW_SWITCHX2 module will be called mlxsw_switchx2. config MLXSW_SPECTRUM - tristate "Mellanox Technologies Spectrum support" + tristate "Mellanox Technologies Spectrum family support" depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q depends on PSAMPLE || PSAMPLE=n depends on BRIDGE || BRIDGE=n @@ -87,8 +87,8 @@ config MLXSW_SPECTRUM select NET_PTP_CLASSIFY if PTP_1588_CLOCK default m ---help--- - This driver supports Mellanox Technologies Spectrum Ethernet - Switch ASICs. + This driver supports Mellanox Technologies + Spectrum/Spectrum-2/Spectrum-3 Ethernet Switch ASICs. To compile this driver as a module, choose M here: the module will be called mlxsw_spectrum. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 171b36bd8a4e..0e86a581d45b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -29,7 +29,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_mr_tcam.o spectrum_mr.o \ spectrum_qdisc.o spectrum_span.o \ spectrum_nve.o spectrum_nve_vxlan.o \ - spectrum_dpipe.o + spectrum_dpipe.o spectrum_trap.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 17ceac7505e5..14dcc786926d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -80,7 +80,6 @@ struct mlxsw_core { struct mlxsw_thermal *thermal; struct mlxsw_core_port *ports; unsigned int max_ports; - bool reload_fail; bool fw_flash_in_progress; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ @@ -984,23 +983,29 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, return 0; } -static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink, - struct netlink_ext_ack *extack) +static int +mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink, + struct netlink_ext_ack *extack) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - int err; if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET)) return -EOPNOTSUPP; mlxsw_core_bus_device_unregister(mlxsw_core, true); - err = mlxsw_core_bus_device_register(mlxsw_core->bus_info, - mlxsw_core->bus, - mlxsw_core->bus_priv, true, - devlink); - mlxsw_core->reload_fail = !!err; + return 0; +} - return err; +static int +mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + + return mlxsw_core_bus_device_register(mlxsw_core->bus_info, + mlxsw_core->bus, + mlxsw_core->bus_priv, true, + devlink); } static int mlxsw_devlink_flash_update(struct devlink *devlink, @@ -1017,8 +1022,57 @@ static int mlxsw_devlink_flash_update(struct devlink *devlink, component, extack); } +static int mlxsw_devlink_trap_init(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_init(mlxsw_core, trap, trap_ctx); +} + +static void mlxsw_devlink_trap_fini(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_fini) + return; + mlxsw_driver->trap_fini(mlxsw_core, trap, trap_ctx); +} + +static int mlxsw_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_action_set) + return -EOPNOTSUPP; + return mlxsw_driver->trap_action_set(mlxsw_core, trap, action); +} + +static int +mlxsw_devlink_trap_group_init(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->trap_group_init) + return -EOPNOTSUPP; + return mlxsw_driver->trap_group_init(mlxsw_core, group); +} + static const struct devlink_ops mlxsw_devlink_ops = { - .reload = mlxsw_devlink_core_bus_device_reload, + .reload_down = mlxsw_devlink_core_bus_device_reload_down, + .reload_up = mlxsw_devlink_core_bus_device_reload_up, .port_type_set = mlxsw_devlink_port_type_set, .port_split = mlxsw_devlink_port_split, .port_unsplit = mlxsw_devlink_port_unsplit, @@ -1034,6 +1088,10 @@ static const struct devlink_ops mlxsw_devlink_ops = { .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, .info_get = mlxsw_devlink_info_get, .flash_update = mlxsw_devlink_flash_update, + .trap_init = mlxsw_devlink_trap_init, + .trap_fini = mlxsw_devlink_trap_fini, + .trap_action_set = mlxsw_devlink_trap_action_set, + .trap_group_init = mlxsw_devlink_trap_group_init, }; static int @@ -1191,7 +1249,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, { struct devlink *devlink = priv_to_devlink(mlxsw_core); - if (mlxsw_core->reload_fail) { + if (devlink_is_reload_failed(devlink)) { if (!reload) /* Only the parts that were not de-initialized in the * failed reload attempt need to be de-initialized. @@ -1477,6 +1535,18 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_trap_unregister); +int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + enum mlxsw_reg_hpkt_action action) +{ + char hpkt_pl[MLXSW_REG_HPKT_LEN]; + + mlxsw_reg_hpkt_pack(hpkt_pl, action, listener->trap_id, + listener->trap_group, listener->is_ctrl); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl); +} +EXPORT_SYMBOL(mlxsw_core_trap_action_set); + static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) { return atomic64_inc_return(&mlxsw_core->emad.tid); @@ -1794,11 +1864,12 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_res_get); -int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, - u32 port_number, bool split, - u32 split_port_subnumber, - const unsigned char *switch_id, - unsigned char switch_id_len) +static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, + enum devlink_port_flavour flavour, + u32 port_number, bool split, + u32 split_port_subnumber, + const unsigned char *switch_id, + unsigned char switch_id_len) { struct devlink *devlink = priv_to_devlink(mlxsw_core); struct mlxsw_core_port *mlxsw_core_port = @@ -1807,17 +1878,16 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, int err; mlxsw_core_port->local_port = local_port; - devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_number, split, split_port_subnumber, + devlink_port_attrs_set(devlink_port, flavour, port_number, + split, split_port_subnumber, switch_id, switch_id_len); err = devlink_port_register(devlink, devlink_port, local_port); if (err) memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); return err; } -EXPORT_SYMBOL(mlxsw_core_port_init); -void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) +static void __mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) { struct mlxsw_core_port *mlxsw_core_port = &mlxsw_core->ports[local_port]; @@ -1826,8 +1896,53 @@ void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) devlink_port_unregister(devlink_port); memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); } + +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, + u32 port_number, bool split, + u32 split_port_subnumber, + const unsigned char *switch_id, + unsigned char switch_id_len) +{ + return __mlxsw_core_port_init(mlxsw_core, local_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_number, split, split_port_subnumber, + switch_id, switch_id_len); +} +EXPORT_SYMBOL(mlxsw_core_port_init); + +void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) +{ + __mlxsw_core_port_fini(mlxsw_core, local_port); +} EXPORT_SYMBOL(mlxsw_core_port_fini); +int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core, + void *port_driver_priv, + const unsigned char *switch_id, + unsigned char switch_id_len) +{ + struct mlxsw_core_port *mlxsw_core_port = + &mlxsw_core->ports[MLXSW_PORT_CPU_PORT]; + int err; + + err = __mlxsw_core_port_init(mlxsw_core, MLXSW_PORT_CPU_PORT, + DEVLINK_PORT_FLAVOUR_CPU, + 0, false, 0, + switch_id, switch_id_len); + if (err) + return err; + + mlxsw_core_port->port_driver_priv = port_driver_priv; + return 0; +} +EXPORT_SYMBOL(mlxsw_core_cpu_port_init); + +void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core) +{ + __mlxsw_core_port_fini(mlxsw_core, MLXSW_PORT_CPU_PORT); +} +EXPORT_SYMBOL(mlxsw_core_cpu_port_fini); + void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv, struct net_device *dev) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 8efcff4b59cb..5d7d2ab6d155 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -128,6 +128,9 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv); +int mlxsw_core_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listener, + enum mlxsw_reg_hpkt_action action); typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, size_t payload_len, unsigned long cb_priv); @@ -174,6 +177,11 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, const unsigned char *switch_id, unsigned char switch_id_len); void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port); +int mlxsw_core_cpu_port_init(struct mlxsw_core *mlxsw_core, + void *port_driver_priv, + const unsigned char *switch_id, + unsigned char switch_id_len); +void mlxsw_core_cpu_port_fini(struct mlxsw_core *mlxsw_core); void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv, struct net_device *dev); void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port, @@ -289,6 +297,15 @@ struct mlxsw_driver { int (*flash_update)(struct mlxsw_core *mlxsw_core, const char *file_name, const char *component, struct netlink_ext_ack *extack); + int (*trap_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); + void (*trap_fini)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); + int (*trap_action_set)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action); + int (*trap_group_init)(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); int (*resources_register)(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 946339e13eb9..5b1323645a5d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -9,6 +9,7 @@ #define PCI_DEVICE_ID_MELLANOX_SWITCHX2 0xc738 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM2 0xcf6c +#define PCI_DEVICE_ID_MELLANOX_SPECTRUM3 0xcf70 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB 0xcb20 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB2 0xcf08 diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ead36702549a..5494cf93f34c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4126,7 +4126,6 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2 BIT(5) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 BIT(6) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 BIT(7) -#define MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4 BIT(8) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR BIT(12) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR BIT(13) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14) @@ -5422,6 +5421,14 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, + + __MLXSW_REG_HTGT_TRAP_GROUP_MAX, + MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1 +}; + +enum mlxsw_reg_htgt_discard_trap_group { + MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX, + MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, }; /* reg_htgt_trap_group @@ -5559,6 +5566,8 @@ enum mlxsw_reg_hpkt_action { MLXSW_REG_HPKT_ACTION_DISCARD, MLXSW_REG_HPKT_ACTION_SOFT_DISCARD, MLXSW_REG_HPKT_ACTION_TRAP_AND_SOFT_DISCARD, + MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU, + MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT = 15, }; /* reg_hpkt_action @@ -5569,6 +5578,8 @@ enum mlxsw_reg_hpkt_action { * 3 - Discard. * 4 - Soft discard (allow other traps to act on the packet). * 5 - Trap and soft discard (allow other traps to overwrite this trap). + * 6 - Trap to CPU (CPU receives sole copy) and count it as error. + * 15 - Restore the firmware's default action. * Access: RW * * Note: Must be set to 0 (forward) for event trap IDs, as they are already diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index eda9c23e87b2..dd234cf7b39d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -48,7 +48,7 @@ #define MLXSW_SP1_FWREV_MAJOR 13 #define MLXSW_SP1_FWREV_MINOR 2000 -#define MLXSW_SP1_FWREV_SUBMINOR 1122 +#define MLXSW_SP1_FWREV_SUBMINOR 1886 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { @@ -65,6 +65,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2"; +static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3"; static const char mlxsw_sp_driver_version[] = "1.0"; static const unsigned char mlxsw_sp1_mac_mask[ETH_ALEN] = { @@ -174,6 +175,10 @@ struct mlxsw_sp_ptp_ops { void (*shaper_work)(struct work_struct *work); int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, struct ethtool_ts_info *info); + int (*get_stats_count)(void); + void (*get_stats_strings)(u8 **p); + void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index); }; static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev, @@ -1625,7 +1630,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port, } flow_block_cb_incref(block_cb); err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block, - mlxsw_sp_port, ingress); + mlxsw_sp_port, ingress, f->extack); if (err) goto err_block_bind; @@ -2328,6 +2333,7 @@ static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc) static void mlxsw_sp_port_get_strings(struct net_device *dev, u32 stringset, u8 *data) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); u8 *p = data; int i; @@ -2369,6 +2375,7 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, for (i = 0; i < TC_MAX_QUEUE; i++) mlxsw_sp_port_get_tc_strings(&p, i); + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_strings(&p); break; } } @@ -2463,6 +2470,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, static void mlxsw_sp_port_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int i, data_index = 0; /* IEEE 802.3 Counters */ @@ -2503,13 +2511,21 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, data, data_index); data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN; } + + /* PTP counters */ + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats(mlxsw_sp_port, + data, data_index); + data_index += mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count(); } static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + switch (sset) { case ETH_SS_STATS: - return MLXSW_SP_PORT_ETHTOOL_STATS_LEN; + return MLXSW_SP_PORT_ETHTOOL_STATS_LEN + + mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count(); default: return -EOPNOTSUPP; } @@ -2608,26 +2624,6 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { .speed = SPEED_50000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT, - .speed = SPEED_56000, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT, - .speed = SPEED_56000, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT, - .speed = SPEED_56000, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .mask_ethtool = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, - .speed = SPEED_56000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4, .mask_ethtool = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, .speed = SPEED_100000, @@ -2674,7 +2670,7 @@ mlxsw_sp1_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - unsigned long *mode) + u8 width, unsigned long *mode) { int i; @@ -2715,7 +2711,7 @@ mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, } static u32 -mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, +mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd) { u32 ptys_proto = 0; @@ -2729,7 +2725,8 @@ mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, return ptys_proto; } -static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 speed) +static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u8 width, + u32 speed) { u32 ptys_proto = 0; int i; @@ -2917,11 +2914,31 @@ mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = { #define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \ ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4) +#define MLXSW_SP_PORT_MASK_WIDTH_1X BIT(0) +#define MLXSW_SP_PORT_MASK_WIDTH_2X BIT(1) +#define MLXSW_SP_PORT_MASK_WIDTH_4X BIT(2) + +static u8 mlxsw_sp_port_mask_width_get(u8 width) +{ + switch (width) { + case 1: + return MLXSW_SP_PORT_MASK_WIDTH_1X; + case 2: + return MLXSW_SP_PORT_MASK_WIDTH_2X; + case 4: + return MLXSW_SP_PORT_MASK_WIDTH_4X; + default: + WARN_ON_ONCE(1); + return 0; + } +} + struct mlxsw_sp2_port_link_mode { const enum ethtool_link_mode_bit_indices *mask_ethtool; int m_ethtool_len; u32 mask; u32 speed; + u8 mask_width; }; static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { @@ -2929,72 +2946,97 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M, .mask_ethtool = mlxsw_sp2_mask_ethtool_sgmii_100m, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_100, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII, .mask_ethtool = mlxsw_sp2_mask_ethtool_1000base_x_sgmii, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_1000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII, .mask_ethtool = mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_2500, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R, .mask_ethtool = mlxsw_sp2_mask_ethtool_5gbase_r, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_5000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G, .mask_ethtool = mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G, .mask_ethtool = mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR, .mask_ethtool = mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_25000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_2X | + MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_50000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR, .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X, .speed = SPEED_50000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_100000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2, .mask_ethtool = mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_2X, .speed = SPEED_100000, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN, + .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X, .speed = SPEED_200000, }, }; @@ -3022,12 +3064,14 @@ mlxsw_sp2_set_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, static void mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - unsigned long *mode) + u8 width, unsigned long *mode) { + u8 mask_width = mlxsw_sp_port_mask_width_get(width); int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) + if ((ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) && + (mask_width & mlxsw_sp2_port_link_mode[i].mask_width)) mlxsw_sp2_set_bit_ethtool(&mlxsw_sp2_port_link_mode[i], mode); } @@ -3078,27 +3122,32 @@ mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, } static u32 -mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, +mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd) { + u8 mask_width = mlxsw_sp_port_mask_width_get(width); u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], + if ((mask_width & mlxsw_sp2_port_link_mode[i].mask_width) && + mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], cmd->link_modes.advertising)) ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } return ptys_proto; } -static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 speed) +static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, + u8 width, u32 speed) { + u8 mask_width = mlxsw_sp_port_mask_width_get(width); u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (speed == mlxsw_sp2_port_link_mode[i].speed) + if ((speed == mlxsw_sp2_port_link_mode[i].speed) && + (mask_width & mlxsw_sp2_port_link_mode[i].mask_width)) ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } return ptys_proto; @@ -3182,7 +3231,7 @@ mlxsw_sp2_port_type_speed_ops = { static void mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, - struct ethtool_link_ksettings *cmd) + u8 width, struct ethtool_link_ksettings *cmd) { const struct mlxsw_sp_port_type_speed_ops *ops; @@ -3193,12 +3242,13 @@ mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); ops->from_ptys_supported_port(mlxsw_sp, eth_proto_cap, cmd); - ops->from_ptys_link(mlxsw_sp, eth_proto_cap, cmd->link_modes.supported); + ops->from_ptys_link(mlxsw_sp, eth_proto_cap, width, + cmd->link_modes.supported); } static void mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, - u32 eth_proto_admin, bool autoneg, + u32 eth_proto_admin, bool autoneg, u8 width, struct ethtool_link_ksettings *cmd) { const struct mlxsw_sp_port_type_speed_ops *ops; @@ -3209,7 +3259,7 @@ mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, return; ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - ops->from_ptys_link(mlxsw_sp, eth_proto_admin, + ops->from_ptys_link(mlxsw_sp, eth_proto_admin, width, cmd->link_modes.advertising); } @@ -3264,10 +3314,11 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, ð_proto_admin, ð_proto_oper); - mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, cmd); + mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, + mlxsw_sp_port->mapping.width, cmd); mlxsw_sp_port_get_link_advertise(mlxsw_sp, eth_proto_admin, autoneg, - cmd); + mlxsw_sp_port->mapping.width, cmd); cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl); @@ -3300,13 +3351,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, NULL, NULL); autoneg = cmd->base.autoneg == AUTONEG_ENABLE; - if (!autoneg && cmd->base.speed == SPEED_56000) { - netdev_err(dev, "56G not supported with autoneg off\n"); - return -EINVAL; - } eth_proto_new = autoneg ? - ops->to_ptys_advert_link(mlxsw_sp, cmd) : - ops->to_ptys_speed(mlxsw_sp, cmd->base.speed); + ops->to_ptys_advert_link(mlxsw_sp, mlxsw_sp_port->mapping.width, + cmd) : + ops->to_ptys_speed(mlxsw_sp, mlxsw_sp_port->mapping.width, + cmd->base.speed); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { @@ -3823,6 +3872,45 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_core_port_fini(mlxsw_sp->core, local_port); } +static int mlxsw_sp_cpu_port_create(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = kzalloc(sizeof(*mlxsw_sp_port), GFP_KERNEL); + if (!mlxsw_sp_port) + return -ENOMEM; + + mlxsw_sp_port->mlxsw_sp = mlxsw_sp; + mlxsw_sp_port->local_port = MLXSW_PORT_CPU_PORT; + + err = mlxsw_core_cpu_port_init(mlxsw_sp->core, + mlxsw_sp_port, + mlxsw_sp->base_mac, + sizeof(mlxsw_sp->base_mac)); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize core CPU port\n"); + goto err_core_cpu_port_init; + } + + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = mlxsw_sp_port; + return 0; + +err_core_cpu_port_init: + kfree(mlxsw_sp_port); + return err; +} + +static void mlxsw_sp_cpu_port_remove(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT]; + + mlxsw_core_cpu_port_fini(mlxsw_sp->core); + mlxsw_sp->ports[MLXSW_PORT_CPU_PORT] = NULL; + kfree(mlxsw_sp_port); +} + static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u8 local_port) { return mlxsw_sp->ports[local_port] != NULL; @@ -3835,6 +3923,7 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); + mlxsw_sp_cpu_port_remove(mlxsw_sp); kfree(mlxsw_sp->port_to_module); kfree(mlxsw_sp->ports); } @@ -3859,6 +3948,10 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) goto err_port_to_module_alloc; } + err = mlxsw_sp_cpu_port_create(mlxsw_sp); + if (err) + goto err_cpu_port_create; + for (i = 1; i < max_ports; i++) { /* Mark as invalid */ mlxsw_sp->port_to_module[i] = -1; @@ -3882,6 +3975,8 @@ err_port_module_info_get: for (i--; i >= 1; i--) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); + mlxsw_sp_cpu_port_remove(mlxsw_sp); +err_cpu_port_create: kfree(mlxsw_sp->port_to_module); err_port_to_module_alloc: kfree(mlxsw_sp->ports); @@ -4609,6 +4704,9 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set, .shaper_work = mlxsw_sp1_ptp_shaper_work, .get_ts_info = mlxsw_sp1_ptp_get_ts_info, + .get_stats_count = mlxsw_sp1_get_stats_count, + .get_stats_strings = mlxsw_sp1_get_stats_strings, + .get_stats = mlxsw_sp1_get_stats, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { @@ -4622,6 +4720,9 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, .shaper_work = mlxsw_sp2_ptp_shaper_work, .get_ts_info = mlxsw_sp2_ptp_get_ts_info, + .get_stats_count = mlxsw_sp2_get_stats_count, + .get_stats_strings = mlxsw_sp2_get_stats_strings, + .get_stats = mlxsw_sp2_get_stats, }; static int mlxsw_sp_netdevice_event(struct notifier_block *unused, @@ -4664,6 +4765,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_traps_init; } + err = mlxsw_sp_devlink_traps_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize devlink traps\n"); + goto err_devlink_traps_init; + } + err = mlxsw_sp_buffers_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize buffers\n"); @@ -4797,6 +4904,8 @@ err_span_init: err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: + mlxsw_sp_devlink_traps_fini(mlxsw_sp); +err_devlink_traps_init: mlxsw_sp_traps_fini(mlxsw_sp); err_traps_init: mlxsw_sp_fids_fini(mlxsw_sp); @@ -4869,6 +4978,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); + mlxsw_sp_devlink_traps_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_fids_fini(mlxsw_sp); mlxsw_sp_kvdl_fini(mlxsw_sp); @@ -5026,6 +5136,26 @@ static int mlxsw_sp1_resources_kvd_register(struct mlxsw_core *mlxsw_core) return 0; } +static int mlxsw_sp2_resources_kvd_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params kvd_size_params; + u32 kvd_size; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE)) + return -EIO; + + kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE); + devlink_resource_size_params_init(&kvd_size_params, kvd_size, kvd_size, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD, + kvd_size, MLXSW_SP_RESOURCE_KVD, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &kvd_size_params); +} + static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) { return mlxsw_sp1_resources_kvd_register(mlxsw_core); @@ -5033,7 +5163,7 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) { - return 0; + return mlxsw_sp2_resources_kvd_register(mlxsw_core); } static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core, @@ -5230,6 +5360,10 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, .flash_update = mlxsw_sp_flash_update, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, @@ -5260,6 +5394,43 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, .flash_update = mlxsw_sp_flash_update, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .resources_register = mlxsw_sp2_resources_register, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp2_config_profile, + .res_query_enabled = true, +}; + +static struct mlxsw_driver mlxsw_sp3_driver = { + .kind = mlxsw_sp3_driver_name, + .priv_size = sizeof(struct mlxsw_sp), + .init = mlxsw_sp2_init, + .fini = mlxsw_sp_fini, + .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .flash_update = mlxsw_sp_flash_update, + .trap_init = mlxsw_sp_trap_init, + .trap_fini = mlxsw_sp_trap_fini, + .trap_action_set = mlxsw_sp_trap_action_set, + .trap_group_init = mlxsw_sp_trap_group_init, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, @@ -6304,6 +6475,16 @@ static struct pci_driver mlxsw_sp2_pci_driver = { .id_table = mlxsw_sp2_pci_id_table, }; +static const struct pci_device_id mlxsw_sp3_pci_id_table[] = { + {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM3), 0}, + {0, }, +}; + +static struct pci_driver mlxsw_sp3_pci_driver = { + .name = mlxsw_sp3_driver_name, + .id_table = mlxsw_sp3_pci_id_table, +}; + static int __init mlxsw_sp_module_init(void) { int err; @@ -6319,6 +6500,10 @@ static int __init mlxsw_sp_module_init(void) if (err) goto err_sp2_core_driver_register; + err = mlxsw_core_driver_register(&mlxsw_sp3_driver); + if (err) + goto err_sp3_core_driver_register; + err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver); if (err) goto err_sp1_pci_driver_register; @@ -6327,11 +6512,19 @@ static int __init mlxsw_sp_module_init(void) if (err) goto err_sp2_pci_driver_register; + err = mlxsw_pci_driver_register(&mlxsw_sp3_pci_driver); + if (err) + goto err_sp3_pci_driver_register; + return 0; +err_sp3_pci_driver_register: + mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver); err_sp2_pci_driver_register: mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver); err_sp1_pci_driver_register: + mlxsw_core_driver_unregister(&mlxsw_sp3_driver); +err_sp3_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp2_driver); err_sp2_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp1_driver); @@ -6343,8 +6536,10 @@ err_sp1_core_driver_register: static void __exit mlxsw_sp_module_exit(void) { + mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver); mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver); mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver); + mlxsw_core_driver_unregister(&mlxsw_sp3_driver); mlxsw_core_driver_unregister(&mlxsw_sp2_driver); mlxsw_core_driver_unregister(&mlxsw_sp1_driver); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); @@ -6359,4 +6554,5 @@ MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Spectrum driver"); MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table); MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table); +MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table); MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 6664119fb0c8..b2a0028b1694 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -225,6 +225,16 @@ struct mlxsw_sp_port_xstats { u64 tx_packets[IEEE_8021QAZ_MAX_TCS]; }; +struct mlxsw_sp_ptp_port_dir_stats { + u64 packets; + u64 timestamps; +}; + +struct mlxsw_sp_ptp_port_stats { + struct mlxsw_sp_ptp_port_dir_stats rx_gcd; + struct mlxsw_sp_ptp_port_dir_stats tx_gcd; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; @@ -271,6 +281,7 @@ struct mlxsw_sp_port { struct hwtstamp_config hwtstamp_config; u16 ing_types; u16 egr_types; + struct mlxsw_sp_ptp_port_stats stats; } ptp; }; @@ -279,14 +290,14 @@ struct mlxsw_sp_port_type_speed_ops { u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - unsigned long *mode); + u8 width, unsigned long *mode); u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto); void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); - u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, + u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width, const struct ethtool_link_ksettings *cmd); - u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 speed); + u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed); u32 (*to_ptys_upper_speed)(struct mlxsw_sp *mlxsw_sp, u32 upper_speed); int (*port_speed_base)(struct mlxsw_sp *mlxsw_sp, u8 local_port, u32 *base_speed); @@ -623,7 +634,8 @@ struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; - u8 action_created:1; + u8 action_created:1, + egress_bind_blocker:1; unsigned int counter_index; }; @@ -642,6 +654,7 @@ struct mlxsw_sp_acl_block { struct mlxsw_sp *mlxsw_sp; unsigned int rule_count; unsigned int disable_count; + unsigned int egress_blocker_rule_count; struct net *net; }; @@ -657,7 +670,8 @@ void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block); int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress); + bool ingress, + struct netlink_ext_ack *extack); int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, @@ -955,4 +969,17 @@ void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp); +/* spectrum_trap.c */ +int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); +void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx); +int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action); +int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 84a87d059333..150b3a144b83 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -239,7 +239,8 @@ mlxsw_sp_acl_block_lookup(struct mlxsw_sp_acl_block *block, int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress) + bool ingress, + struct netlink_ext_ack *extack) { struct mlxsw_sp_acl_block_binding *binding; int err; @@ -247,6 +248,11 @@ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress))) return -EEXIST; + if (!ingress && block->egress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + binding = kzalloc(sizeof(*binding), GFP_KERNEL); if (!binding) return -ENOMEM; @@ -672,6 +678,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; int err; err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei); @@ -689,14 +696,14 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, * one, to be directly bound to device. The rest of the * rulesets are bound by "Goto action set". */ - err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, - ruleset->ht_key.block); + err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block); if (err) goto err_ruleset_block_bind; } list_add_tail(&rule->list, &mlxsw_sp->acl->rules); - ruleset->ht_key.block->rule_count++; + block->rule_count++; + block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; return 0; err_ruleset_block_bind: @@ -712,7 +719,9 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; + block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; ruleset->ht_key.block->rule_count--; list_del(&rule->list); if (!ruleset->ht_key.chain_index && diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 888ba4300bcc..b9eeae37a4dc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -250,6 +250,10 @@ static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; + if (local_port == MLXSW_PORT_CPU_PORT && + des->dir == MLXSW_REG_SBXX_DIR_INGRESS) + return 0; + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, true, 0, 0); return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, @@ -273,6 +277,10 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; + if (local_port == MLXSW_PORT_CPU_PORT && + des->dir == MLXSW_REG_SBXX_DIR_INGRESS) + return 0; + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false, 0, 0); @@ -1085,6 +1093,11 @@ int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, u32 max_buff; int err; + if (local_port == MLXSW_PORT_CPU_PORT) { + NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's threshold is forbidden"); + return -EINVAL; + } + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, threshold, &max_buff, extack); if (err) @@ -1130,6 +1143,11 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u32 max_buff; int err; + if (local_port == MLXSW_PORT_CPU_PORT) { + NL_SET_ERR_MSG_MOD(extack, "Changing CPU port's binding is forbidden"); + return -EINVAL; + } + if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir) { NL_SET_ERR_MSG_MOD(extack, "Binding egress TC to ingress pool and vice versa is forbidden"); return -EINVAL; @@ -1187,6 +1205,11 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; + if (local_port == MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + masked_count++; + continue; + } for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) { cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, MLXSW_REG_SBXX_DIR_INGRESS); @@ -1222,7 +1245,7 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, char *sbsr_pl; u8 masked_count; u8 local_port_1; - u8 local_port = 0; + u8 local_port; int i; int err; int err2; @@ -1231,8 +1254,8 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, if (!sbsr_pl) return -ENOMEM; + local_port = MLXSW_PORT_CPU_PORT; next_batch: - local_port++; local_port_1 = local_port; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, false); @@ -1243,7 +1266,11 @@ next_batch: for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + if (local_port != MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, + local_port, 1); + } mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, @@ -1264,8 +1291,10 @@ do_query: cb_priv); if (err) goto out; - if (local_port < mlxsw_core_max_ports(mlxsw_core)) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) { + local_port++; goto next_batch; + } out: err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); @@ -1282,7 +1311,7 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, LIST_HEAD(bulk_list); char *sbsr_pl; unsigned int masked_count; - u8 local_port = 0; + u8 local_port; int i; int err; int err2; @@ -1291,8 +1320,8 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, if (!sbsr_pl) return -ENOMEM; + local_port = MLXSW_PORT_CPU_PORT; next_batch: - local_port++; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, true); for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) @@ -1302,7 +1331,11 @@ next_batch: for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + if (local_port != MLXSW_PORT_CPU_PORT) { + /* Ingress quotas are not supported for the CPU port */ + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, + local_port, 1); + } mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, @@ -1319,8 +1352,10 @@ do_query: &bulk_list, NULL, 0); if (err) goto out; - if (local_port < mlxsw_core_max_ports(mlxsw_core)) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) { + local_port++; goto next_batch; + } out: err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 202e9a246019..0ad1a24abfc6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -78,6 +78,16 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid; u16 fid_index; + if (mlxsw_sp_acl_block_is_egress_bound(block)) { + NL_SET_ERR_MSG_MOD(extack, "Redirect action is not supported on egress"); + return -EOPNOTSUPP; + } + + /* Forbid block with this rulei to be bound + * to egress in future. + */ + rulei->egress_bind_blocker = 1; + fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp); fid_index = mlxsw_sp_fid_index(fid); err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei, @@ -257,6 +267,12 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, flow_rule_match_tcp(rule, &match); + if (match.mask->flags & htons(0x0E00)) { + NL_SET_ERR_MSG_MOD(f->common.extack, "TCP flags match not supported on reserved bits"); + dev_err(mlxsw_sp->bus_info->dev, "TCP flags match not supported on reserved bits\n"); + return -EINVAL; + } + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS, ntohs(match.key->flags), ntohs(match.mask->flags)); @@ -390,6 +406,12 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress"); return -EOPNOTSUPP; } + + /* Forbid block with this rulei to be bound + * to egress in future. + */ + rulei->egress_bind_blocker = 1; + if (match.mask->vlan_id != 0) mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VID, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 38bb1cfe4e8c..ec2ff3d7f41c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -630,6 +630,8 @@ static void mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, struct mlxsw_sp1_ptp_unmatched *unmatched) { + struct mlxsw_sp_ptp_port_dir_stats *stats; + struct mlxsw_sp_port *mlxsw_sp_port; int err; /* If an unmatched entry has an SKB, it has to be handed over to the @@ -650,6 +652,17 @@ mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, /* The packet was matched with timestamp during the walk. */ goto out; + mlxsw_sp_port = ptp_state->mlxsw_sp->ports[unmatched->key.local_port]; + if (mlxsw_sp_port) { + stats = unmatched->key.ingress ? + &mlxsw_sp_port->ptp.stats.rx_gcd : + &mlxsw_sp_port->ptp.stats.tx_gcd; + if (unmatched->skb) + stats->packets++; + else + stats->timestamps++; + } + /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While * the comment at that function states that it can only be called in * soft IRQ context, this pattern of local_bh_disable() + @@ -1098,3 +1111,57 @@ int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return 0; } + +struct mlxsw_sp_ptp_port_stat { + char str[ETH_GSTRING_LEN]; + ptrdiff_t offset; +}; + +#define MLXSW_SP_PTP_PORT_STAT(NAME, FIELD) \ + { \ + .str = NAME, \ + .offset = offsetof(struct mlxsw_sp_ptp_port_stats, \ + FIELD), \ + } + +static const struct mlxsw_sp_ptp_port_stat mlxsw_sp_ptp_port_stats[] = { + MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_packets", rx_gcd.packets), + MLXSW_SP_PTP_PORT_STAT("ptp_rx_gcd_timestamps", rx_gcd.timestamps), + MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_packets", tx_gcd.packets), + MLXSW_SP_PTP_PORT_STAT("ptp_tx_gcd_timestamps", tx_gcd.timestamps), +}; + +#undef MLXSW_SP_PTP_PORT_STAT + +#define MLXSW_SP_PTP_PORT_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_ptp_port_stats) + +int mlxsw_sp1_get_stats_count(void) +{ + return MLXSW_SP_PTP_PORT_STATS_LEN; +} + +void mlxsw_sp1_get_stats_strings(u8 **p) +{ + int i; + + for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) { + memcpy(*p, mlxsw_sp_ptp_port_stats[i].str, + ETH_GSTRING_LEN); + *p += ETH_GSTRING_LEN; + } +} + +void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ + void *stats = &mlxsw_sp_port->ptp.stats; + ptrdiff_t offset; + int i; + + data += data_index; + for (i = 0; i < MLXSW_SP_PTP_PORT_STATS_LEN; i++) { + offset = mlxsw_sp_ptp_port_stats[i].offset; + *data++ = *(u64 *)(stats + offset); + } +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index 72e55f6926b9..8c386571afce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -59,6 +59,11 @@ void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, struct ethtool_ts_info *info); +int mlxsw_sp1_get_stats_count(void); +void mlxsw_sp1_get_stats_strings(u8 **p); +void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index); + #else static inline struct mlxsw_sp_ptp_clock * @@ -125,6 +130,19 @@ static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_ptp_get_ts_info_noptp(info); } +static inline int mlxsw_sp1_get_stats_count(void) +{ + return 0; +} + +static inline void mlxsw_sp1_get_stats_strings(u8 **p) +{ +} + +static inline void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ +} #endif static inline struct mlxsw_sp_ptp_clock * @@ -183,4 +201,18 @@ static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_ptp_get_ts_info_noptp(info); } +static inline int mlxsw_sp2_get_stats_count(void) +{ + return 0; +} + +static inline void mlxsw_sp2_get_stats_strings(u8 **p) +{ +} + +static inline void mlxsw_sp2_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + u64 *data, int data_index) +{ +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e618be7ce6c6..a330b369e899 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2943,7 +2943,7 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) val = nh_grp->count; for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - val ^= nh->ifindex; + val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed); } return jhash(&val, sizeof(val), seed); default: @@ -2961,7 +2961,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev; - val ^= dev->ifindex; + val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed); } return jhash(&val, sizeof(val), seed); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c new file mode 100644 index 000000000000..899450b28621 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <net/devlink.h> +#include <uapi/linux/devlink.h> + +#include "core.h" +#include "reg.h" +#include "spectrum.h" + +#define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT + +static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, + void *priv); + +#define MLXSW_SP_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + MLXSW_SP_TRAP_METADATA) + +#define MLXSW_SP_RXL_DISCARD(_id, _group_id) \ + MLXSW_RXL(mlxsw_sp_rx_drop_listener, DISCARD_##_id, SET_FW_DEFAULT, \ + false, SP_##_group_id, DISCARD) + +static struct devlink_trap mlxsw_sp_traps_arr[] = { + MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), + MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), + MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), + MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), + MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), +}; + +static struct mlxsw_listener mlxsw_sp_listeners_arr[] = { + MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS), +}; + +/* Mapping between hardware trap and devlink trap. Multiple hardware traps can + * be mapped to the same devlink trap. Order is according to + * 'mlxsw_sp_listeners_arr'. + */ +static u16 mlxsw_sp_listener_devlink_map[] = { + DEVLINK_TRAP_GENERIC_ID_SMAC_MC, + DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH, + DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, + DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER, + DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST, + DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST, + DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER, +}; + +static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port_pcpu_stats *pcpu_stats; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n", + local_port); + kfree_skb(skb); + return -EINVAL; + } + + skb->dev = mlxsw_sp_port->dev; + + pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); + u64_stats_update_begin(&pcpu_stats->syncp); + pcpu_stats->rx_packets++; + pcpu_stats->rx_bytes += skb->len; + u64_stats_update_end(&pcpu_stats->syncp); + + skb->protocol = eth_type_trans(skb, skb->dev); + + return 0; +} + +static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx) +{ + struct devlink_port *in_devlink_port; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + struct devlink *devlink; + + mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + + if (mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port)) + return; + + devlink = priv_to_devlink(mlxsw_sp->core); + in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, + local_port); + devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); + consume_skb(skb); +} + +int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + if (WARN_ON(ARRAY_SIZE(mlxsw_sp_listener_devlink_map) != + ARRAY_SIZE(mlxsw_sp_listeners_arr))) + return -EINVAL; + + return devlink_traps_register(devlink, mlxsw_sp_traps_arr, + ARRAY_SIZE(mlxsw_sp_traps_arr), + mlxsw_sp); +} + +void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_traps_unregister(devlink, mlxsw_sp_traps_arr, + ARRAY_SIZE(mlxsw_sp_traps_arr)); +} + +int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + struct mlxsw_listener *listener; + int err; + + if (mlxsw_sp_listener_devlink_map[i] != trap->id) + continue; + listener = &mlxsw_sp_listeners_arr[i]; + + err = mlxsw_core_trap_register(mlxsw_core, listener, trap_ctx); + if (err) + return err; + } + + return 0; +} + +void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, void *trap_ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + struct mlxsw_listener *listener; + + if (mlxsw_sp_listener_devlink_map[i] != trap->id) + continue; + listener = &mlxsw_sp_listeners_arr[i]; + + mlxsw_core_trap_unregister(mlxsw_core, listener, trap_ctx); + } +} + +int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, + const struct devlink_trap *trap, + enum devlink_trap_action action) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + enum mlxsw_reg_hpkt_action hw_action; + struct mlxsw_listener *listener; + int err; + + if (mlxsw_sp_listener_devlink_map[i] != trap->id) + continue; + listener = &mlxsw_sp_listeners_arr[i]; + + switch (action) { + case DEVLINK_TRAP_ACTION_DROP: + hw_action = MLXSW_REG_HPKT_ACTION_SET_FW_DEFAULT; + break; + case DEVLINK_TRAP_ACTION_TRAP: + hw_action = MLXSW_REG_HPKT_ACTION_TRAP_EXCEPTION_TO_CPU; + break; + default: + return -EINVAL; + } + + err = mlxsw_core_trap_action_set(mlxsw_core, listener, + hw_action); + if (err) + return err; + } + + return 0; +} + +#define MLXSW_SP_DISCARD_POLICER_ID (MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1) + +static int +mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp, + const struct devlink_trap_group *group) +{ + enum mlxsw_reg_qpcr_ir_units ir_units; + char qpcr_pl[MLXSW_REG_QPCR_LEN]; + u16 policer_id; + u8 burst_size; + bool is_bytes; + u32 rate; + + switch (group->id) { + case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: + policer_id = MLXSW_SP_DISCARD_POLICER_ID; + ir_units = MLXSW_REG_QPCR_IR_UNITS_M; + is_bytes = false; + rate = 10 * 1024; /* 10Kpps */ + burst_size = 7; + break; + default: + return -EINVAL; + } + + mlxsw_reg_qpcr_pack(qpcr_pl, policer_id, ir_units, is_bytes, rate, + burst_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl); +} + +static int +__mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp, + const struct devlink_trap_group *group) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + u8 priority, tc, group_id; + u16 policer_id; + + switch (group->id) { + case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: + group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS; + policer_id = MLXSW_SP_DISCARD_POLICER_ID; + priority = 0; + tc = 1; + break; + default: + return -EINVAL; + } + + mlxsw_reg_htgt_pack(htgt_pl, group_id, policer_id, priority, tc); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); +} + +int mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, + const struct devlink_trap_group *group) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + int err; + + err = mlxsw_sp_trap_group_policer_init(mlxsw_sp, group); + if (err) + return err; + + err = __mlxsw_sp_trap_group_init(mlxsw_sp, group); + if (err) + return err; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index bdab96f5bc70..1c14c051ee52 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -637,12 +637,6 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { .speed = 50000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 19202bdb5105..7618f084cae9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -66,6 +66,13 @@ enum { MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, + MLXSW_TRAP_ID_DISCARD_ING_PACKET_SMAC_MC = 0x140, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VTAG_ALLOW = 0x148, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_VLAN = 0x149, + MLXSW_TRAP_ID_DISCARD_ING_SWITCH_STP = 0x14A, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_UC = 0x150, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_MC_NULL = 0x151, + MLXSW_TRAP_ID_DISCARD_LOOKUP_SWITCH_LB = 0x152, MLXSW_TRAP_ID_ACL0 = 0x1C0, /* Multicast trap used for routes with trap action */ MLXSW_TRAP_ID_ACL1 = 0x1C1, |