diff options
179 files changed, 13539 insertions, 1173 deletions
diff --git a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml index 5bee665d5fcf..e79e4e166ad8 100644 --- a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml @@ -37,12 +37,14 @@ properties: items: - description: register based extraction - description: frame dma based extraction + - description: analyzer interrupt interrupt-names: minItems: 1 items: - const: xtr - const: fdma + - const: ana resets: items: diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index b7dfe693a332..4e01dc32bc08 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -118,8 +118,10 @@ own name. errors. * - ``max_macs`` - u32 - - Specifies the maximum number of MAC addresses per ethernet port of - this device. + - Typically macvlan, vlan net devices mac are also programmed in their + parent netdevice's Function rx filter. This parameter limit the + maximum number of unicast mac address filters to receive traffic from + per ethernet port of this device. * - ``region_snapshot_enable`` - Boolean - Enable capture of ``devlink-region`` snapshots. @@ -129,3 +131,9 @@ own name. will NACK any attempt of other host to reset the device. This parameter is useful for setups where a device is shared by different hosts, such as multi-host setup. + * - ``io_eq_size`` + - u32 + - Control the size of I/O completion EQs. + * - ``event_eq_size`` + - u32 + - Control the size of asynchronous control events EQ. diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 4e4b97f7971a..38e94ed65936 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -14,8 +14,18 @@ Parameters * - Name - Mode + - Validation * - ``enable_roce`` - driverinit + * - ``io_eq_size`` + - driverinit + - The range is between 64 and 4096. + * - ``event_eq_size`` + - driverinit + - The range is between 64 and 4096. + * - ``max_macs`` + - driverinit + - The range is between 1 and 2^31. Only power of 2 values are supported. The ``mlx5`` driver also implements the following driver-specific parameters. diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 110d6c403bdd..4ffd303c64ea 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1745,7 +1745,7 @@ static void vsc9959_psfp_sfi_table_del(struct ocelot *ocelot, u32 index) static void vsc9959_psfp_parse_gate(const struct flow_action_entry *entry, struct felix_stream_gate *sgi) { - sgi->index = entry->gate.index; + sgi->index = entry->hw_index; sgi->ipv_valid = (entry->gate.prio < 0) ? 0 : 1; sgi->init_ipv = (sgi->ipv_valid) ? entry->gate.prio : 0; sgi->basetime = entry->gate.basetime; @@ -1947,7 +1947,7 @@ static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port, kfree(sgi); break; case FLOW_ACTION_POLICE: - index = a->police.index + VSC9959_PSFP_POLICER_BASE; + index = a->hw_index + VSC9959_PSFP_POLICER_BASE; if (index > VSC9959_PSFP_POLICER_MAX) { ret = -EINVAL; goto err; diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c index 72b9b39b0989..7dcdd784aea4 100644 --- a/drivers/net/dsa/sja1105/sja1105_flower.c +++ b/drivers/net/dsa/sja1105/sja1105_flower.c @@ -379,7 +379,7 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port, vl_rule = true; rc = sja1105_vl_gate(priv, port, extack, cookie, - &key, act->gate.index, + &key, act->hw_index, act->gate.prio, act->gate.basetime, act->gate.cycletime, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 533b8519ec35..466273b22f0a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -898,6 +898,8 @@ #define PCS_V2_WINDOW_SELECT 0x9064 #define PCS_V2_RV_WINDOW_DEF 0x1060 #define PCS_V2_RV_WINDOW_SELECT 0x1064 +#define PCS_V2_YC_WINDOW_DEF 0x18060 +#define PCS_V2_YC_WINDOW_SELECT 0x18064 /* PCS register entry bit positions and sizes */ #define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6 @@ -1030,8 +1032,8 @@ #define XP_PROP_0_PORT_ID_WIDTH 8 #define XP_PROP_0_PORT_MODE_INDEX 8 #define XP_PROP_0_PORT_MODE_WIDTH 4 -#define XP_PROP_0_PORT_SPEEDS_INDEX 23 -#define XP_PROP_0_PORT_SPEEDS_WIDTH 4 +#define XP_PROP_0_PORT_SPEEDS_INDEX 22 +#define XP_PROP_0_PORT_SPEEDS_WIDTH 5 #define XP_PROP_1_MAX_RX_DMA_INDEX 24 #define XP_PROP_1_MAX_RX_DMA_WIDTH 5 #define XP_PROP_1_MAX_RX_QUEUES_INDEX 8 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 90cb55eb5466..efdcf484a510 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -278,6 +278,13 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) (rdev->vendor == PCI_VENDOR_ID_AMD) && (rdev->device == 0x15d0)) { pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF; pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT; + } else if (rdev && (rdev->vendor == PCI_VENDOR_ID_AMD) && + (rdev->device == 0x14b5)) { + pdata->xpcs_window_def_reg = PCS_V2_YC_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V2_YC_WINDOW_SELECT; + + /* Yellow Carp devices do not need cdr workaround */ + pdata->vdata->an_cdr_workaround = 0; } else { pdata->xpcs_window_def_reg = PCS_V2_WINDOW_DEF; pdata->xpcs_window_sel_reg = PCS_V2_WINDOW_SELECT; @@ -460,7 +467,7 @@ static int __maybe_unused xgbe_pci_resume(struct device *dev) return ret; } -static const struct xgbe_version_data xgbe_v2a = { +static struct xgbe_version_data xgbe_v2a = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, .xpcs_access = XGBE_XPCS_ACCESS_V2, .mmc_64bit = 1, @@ -475,7 +482,7 @@ static const struct xgbe_version_data xgbe_v2a = { .an_cdr_workaround = 1, }; -static const struct xgbe_version_data xgbe_v2b = { +static struct xgbe_version_data xgbe_v2b = { .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, .xpcs_access = XGBE_XPCS_ACCESS_V2, .mmc_64bit = 1, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 213769054391..2156600641b6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -124,10 +124,10 @@ #include "xgbe.h" #include "xgbe-common.h" -#define XGBE_PHY_PORT_SPEED_100 BIT(0) -#define XGBE_PHY_PORT_SPEED_1000 BIT(1) -#define XGBE_PHY_PORT_SPEED_2500 BIT(2) -#define XGBE_PHY_PORT_SPEED_10000 BIT(3) +#define XGBE_PHY_PORT_SPEED_100 BIT(1) +#define XGBE_PHY_PORT_SPEED_1000 BIT(2) +#define XGBE_PHY_PORT_SPEED_2500 BIT(3) +#define XGBE_PHY_PORT_SPEED_10000 BIT(4) #define XGBE_MUTEX_RELEASE 0x80000000 diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index b07cb9bc5f2d..4a2622b05ee1 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -635,7 +635,6 @@ static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight) struct bcm4908_enet_dma_ring_bd *buf_desc; struct bcm4908_enet_dma_ring_slot *slot; struct device *dev = enet->dev; - unsigned int bytes = 0; int handled = 0; while (handled < weight && tx_ring->read_idx != tx_ring->write_idx) { @@ -646,7 +645,6 @@ static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight) dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); dev_kfree_skb(slot->skb); - bytes += slot->len; if (++tx_ring->read_idx == tx_ring->length) tx_ring->read_idx = 0; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 2b06d78baa08..a19dd6797070 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1850,6 +1850,14 @@ struct bnx2x { /* Vxlan/Geneve related information */ u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX]; + +#define FW_CAP_INVALIDATE_VF_FP_HSI BIT(0) + u32 fw_cap; + + u32 fw_major; + u32 fw_minor; + u32 fw_rev; + u32 fw_eng; }; /* Tx queues may be less or equal to Rx queues */ @@ -2525,5 +2533,6 @@ void bnx2x_register_phc(struct bnx2x *bp); * Meant for implicit re-load flows. */ int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp); - +int bnx2x_init_firmware(struct bnx2x *bp); +void bnx2x_release_firmware(struct bnx2x *bp); #endif /* bnx2x.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 54a2334dee56..8d36ebbf08e1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2365,10 +2365,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err) if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP && load_code != FW_MSG_CODE_DRV_LOAD_COMMON) { /* build my FW version dword */ - u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) + - (BCM_5710_FW_MINOR_VERSION << 8) + - (BCM_5710_FW_REVISION_VERSION << 16) + - (BCM_5710_FW_ENGINEERING_VERSION << 24); + u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) + + (bp->fw_rev << 16) + (bp->fw_eng << 24); /* read loaded FW from chip */ u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h index 3f8435208bf4..a84d015da5df 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h @@ -241,6 +241,8 @@ IRO[221].m2)) #define XSTORM_VF_TO_PF_OFFSET(funcId) \ (IRO[48].base + ((funcId) * IRO[48].m1)) +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid) \ + (IRO[386].base + ((fid) * IRO[386].m1)) #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0 /* eth hsi version */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index 622fadc50316..611efee75834 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -3024,7 +3024,8 @@ struct afex_stats { #define BCM_5710_FW_MAJOR_VERSION 7 #define BCM_5710_FW_MINOR_VERSION 13 -#define BCM_5710_FW_REVISION_VERSION 15 +#define BCM_5710_FW_REVISION_VERSION 21 +#define BCM_5710_FW_REVISION_VERSION_V15 15 #define BCM_5710_FW_ENGINEERING_VERSION 0 #define BCM_5710_FW_COMPILE_FLAGS 1 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 651bc1d7a57a..4953f5e1e390 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -74,9 +74,19 @@ __stringify(BCM_5710_FW_MINOR_VERSION) "." \ __stringify(BCM_5710_FW_REVISION_VERSION) "." \ __stringify(BCM_5710_FW_ENGINEERING_VERSION) + +#define FW_FILE_VERSION_V15 \ + __stringify(BCM_5710_FW_MAJOR_VERSION) "." \ + __stringify(BCM_5710_FW_MINOR_VERSION) "." \ + __stringify(BCM_5710_FW_REVISION_VERSION_V15) "." \ + __stringify(BCM_5710_FW_ENGINEERING_VERSION) + #define FW_FILE_NAME_E1 "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw" #define FW_FILE_NAME_E1H "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw" #define FW_FILE_NAME_E2 "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw" +#define FW_FILE_NAME_E1_V15 "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw" +#define FW_FILE_NAME_E1H_V15 "bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw" +#define FW_FILE_NAME_E2_V15 "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw" /* Time in jiffies before concluding the transmitter is hung */ #define TX_TIMEOUT (5*HZ) @@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp) CHIP_IS_E1(bp) ? "everest1" : CHIP_IS_E1H(bp) ? "everest1h" : CHIP_IS_E2(bp) ? "everest2" : "everest3", - BCM_5710_FW_MAJOR_VERSION, - BCM_5710_FW_MINOR_VERSION, - BCM_5710_FW_REVISION_VERSION); + bp->fw_major, bp->fw_minor, bp->fw_rev); return rc; } @@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *bp) bnx2x_read_fwinfo(bp); + if (IS_PF(bp)) { + rc = bnx2x_init_firmware(bp); + + if (rc) { + bnx2x_free_mem_bp(bp); + return rc; + } + } + func = BP_FUNC(bp); /* need to reset chip if undi was active */ @@ -12320,6 +12337,7 @@ static int bnx2x_init_bp(struct bnx2x *bp) rc = bnx2x_prev_unload(bp); if (rc) { + bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); return rc; } @@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct bnx2x *bp) /* Check FW version */ offset = be32_to_cpu(fw_hdr->fw_version.offset); fw_ver = firmware->data + offset; - if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) || - (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) || - (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) || - (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) { + if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor || + fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) { BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n", - fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3], - BCM_5710_FW_MAJOR_VERSION, - BCM_5710_FW_MINOR_VERSION, - BCM_5710_FW_REVISION_VERSION, - BCM_5710_FW_ENGINEERING_VERSION); + fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3], + bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng); return -EINVAL; } @@ -13404,34 +13417,51 @@ do { \ (u8 *)bp->arr, len); \ } while (0) -static int bnx2x_init_firmware(struct bnx2x *bp) +int bnx2x_init_firmware(struct bnx2x *bp) { - const char *fw_file_name; + const char *fw_file_name, *fw_file_name_v15; struct bnx2x_fw_file_hdr *fw_hdr; int rc; if (bp->firmware) return 0; - if (CHIP_IS_E1(bp)) + if (CHIP_IS_E1(bp)) { fw_file_name = FW_FILE_NAME_E1; - else if (CHIP_IS_E1H(bp)) + fw_file_name_v15 = FW_FILE_NAME_E1_V15; + } else if (CHIP_IS_E1H(bp)) { fw_file_name = FW_FILE_NAME_E1H; - else if (!CHIP_IS_E1x(bp)) + fw_file_name_v15 = FW_FILE_NAME_E1H_V15; + } else if (!CHIP_IS_E1x(bp)) { fw_file_name = FW_FILE_NAME_E2; - else { + fw_file_name_v15 = FW_FILE_NAME_E2_V15; + } else { BNX2X_ERR("Unsupported chip revision\n"); return -EINVAL; } + BNX2X_DEV_INFO("Loading %s\n", fw_file_name); rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev); if (rc) { - BNX2X_ERR("Can't load firmware file %s\n", - fw_file_name); - goto request_firmware_exit; + BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15); + + /* try to load prev version */ + rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev); + + if (rc) + goto request_firmware_exit; + + bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15; + } else { + bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI; + bp->fw_rev = BCM_5710_FW_REVISION_VERSION; } + bp->fw_major = BCM_5710_FW_MAJOR_VERSION; + bp->fw_minor = BCM_5710_FW_MINOR_VERSION; + bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION; + rc = bnx2x_check_firmware(bp); if (rc) { BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); @@ -13487,7 +13517,7 @@ request_firmware_exit: return rc; } -static void bnx2x_release_firmware(struct bnx2x *bp) +void bnx2x_release_firmware(struct bnx2x *bp) { kfree(bp->init_ops_offsets); kfree(bp->init_ops); @@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev *pdev, return 0; init_one_freemem: + bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); init_one_exit: diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 74a8931ce1d1..11d15cd03600 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf) void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid) { + u16 abs_fid; + + abs_fid = FW_VF_HANDLE(abs_vfid); + /* set the VF-PF association in the FW */ - storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp)); - storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1); + storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp)); + storm_memset_func_en(bp, abs_fid, 1); + + /* Invalidate fp_hsi version for vfs */ + if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI) + REG_WR8(bp, BAR_XSTRORM_INTMEM + + XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0); /* clear vf errors*/ bnx2x_vf_semi_clear_err(bp, abs_vfid); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 1471b6130a2b..d8afcf8d6b30 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1962,7 +1962,7 @@ static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, v void *data, void (*cleanup)(struct flow_block_cb *block_cb)) { - if (!bnxt_is_netdev_indr_offload(netdev)) + if (!netdev || !bnxt_is_netdev_indr_offload(netdev)) return -EOPNOTSUPP; switch (type) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 0536d2c76fbc..3555c12edb45 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -1182,7 +1182,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, } /* parsing gate action */ - if (entryg->gate.index >= priv->psfp_cap.max_psfp_gate) { + if (entryg->hw_index >= priv->psfp_cap.max_psfp_gate) { NL_SET_ERR_MSG_MOD(extack, "No Stream Gate resource!"); err = -ENOSPC; goto free_filter; @@ -1202,7 +1202,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, } refcount_set(&sgi->refcount, 1); - sgi->index = entryg->gate.index; + sgi->index = entryg->hw_index; sgi->init_ipv = entryg->gate.prio; sgi->basetime = entryg->gate.basetime; sgi->cycletime = entryg->gate.cycletime; @@ -1244,7 +1244,7 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, refcount_set(&fmi->refcount, 1); fmi->cir = entryp->police.rate_bytes_ps; fmi->cbs = entryp->police.burst; - fmi->index = entryp->police.index; + fmi->index = entryp->hw_index; filter->flags |= ENETC_PSFP_FLAGS_FMI; filter->fmi_index = fmi->index; sfi->meter_id = fmi->index; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 0b274d8fa45b..3facb55b7161 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -327,6 +327,16 @@ config ICE_SWITCHDEV If unsure, say N. +config ICE_HWTS + bool "Support HW cross-timestamp on platforms with PTM support" + default y + depends on ICE && X86 + help + Say Y to enable hardware supported cross-timestamping on platforms + with PCIe PTM support. The cross-timestamp is available through + the PTP clock driver precise cross-timestamp ioctl + (PTP_SYS_OFFSET_PRECISE). + config FM10K tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support" default n diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c index 21eff0895a7a..f6d56867f857 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c @@ -143,7 +143,7 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id, * @vlan: location of buffer to store VLAN * * This function pulls the MAC address back out of the attribute and will - * place it in the array pointed by by mac_addr. It will return success + * place it in the array pointed by mac_addr. It will return success * if provided with a valid pointers. **/ s32 fm10k_tlv_attr_get_mac_vlan(u32 *attr, u8 *mac_addr, u16 *vlan) diff --git a/drivers/net/ethernet/intel/ice/ice_cgu_regs.h b/drivers/net/ethernet/intel/ice/ice_cgu_regs.h new file mode 100644 index 000000000000..57abd52386d0 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_cgu_regs.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#ifndef _ICE_CGU_REGS_H_ +#define _ICE_CGU_REGS_H_ + +#define NAC_CGU_DWORD9 0x24 +union nac_cgu_dword9 { + struct { + u32 time_ref_freq_sel : 3; + u32 clk_eref1_en : 1; + u32 clk_eref0_en : 1; + u32 time_ref_en : 1; + u32 time_sync_en : 1; + u32 one_pps_out_en : 1; + u32 clk_ref_synce_en : 1; + u32 clk_synce1_en : 1; + u32 clk_synce0_en : 1; + u32 net_clk_ref1_en : 1; + u32 net_clk_ref0_en : 1; + u32 clk_synce1_amp : 2; + u32 misc6 : 1; + u32 clk_synce0_amp : 2; + u32 one_pps_out_amp : 2; + u32 misc24 : 12; + } field; + u32 val; +}; + +#define NAC_CGU_DWORD19 0x4c +union nac_cgu_dword19 { + struct { + u32 tspll_fbdiv_intgr : 8; + u32 fdpll_ulck_thr : 5; + u32 misc15 : 3; + u32 tspll_ndivratio : 4; + u32 tspll_iref_ndivratio : 3; + u32 misc19 : 1; + u32 japll_ndivratio : 4; + u32 japll_iref_ndivratio : 3; + u32 misc27 : 1; + } field; + u32 val; +}; + +#define NAC_CGU_DWORD22 0x58 +union nac_cgu_dword22 { + struct { + u32 fdpll_frac_div_out_nc : 2; + u32 fdpll_lock_int_for : 1; + u32 synce_hdov_int_for : 1; + u32 synce_lock_int_for : 1; + u32 fdpll_phlead_slip_nc : 1; + u32 fdpll_acc1_ovfl_nc : 1; + u32 fdpll_acc2_ovfl_nc : 1; + u32 synce_status_nc : 6; + u32 fdpll_acc1f_ovfl : 1; + u32 misc18 : 1; + u32 fdpllclk_div : 4; + u32 time1588clk_div : 4; + u32 synceclk_div : 4; + u32 synceclk_sel_div2 : 1; + u32 fdpllclk_sel_div2 : 1; + u32 time1588clk_sel_div2 : 1; + u32 misc3 : 1; + } field; + u32 val; +}; + +#define NAC_CGU_DWORD24 0x60 +union nac_cgu_dword24 { + struct { + u32 tspll_fbdiv_frac : 22; + u32 misc20 : 2; + u32 ts_pll_enable : 1; + u32 time_sync_tspll_align_sel : 1; + u32 ext_synce_sel : 1; + u32 ref1588_ck_div : 4; + u32 time_ref_sel : 1; + } field; + u32 val; +}; + +#define TSPLL_CNTR_BIST_SETTINGS 0x344 +union tspll_cntr_bist_settings { + struct { + u32 i_irefgen_settling_time_cntr_7_0 : 8; + u32 i_irefgen_settling_time_ro_standby_1_0 : 2; + u32 reserved195 : 5; + u32 i_plllock_sel_0 : 1; + u32 i_plllock_sel_1 : 1; + u32 i_plllock_cnt_6_0 : 7; + u32 i_plllock_cnt_10_7 : 4; + u32 reserved200 : 4; + } field; + u32 val; +}; + +#define TSPLL_RO_BWM_LF 0x370 +union tspll_ro_bwm_lf { + struct { + u32 bw_freqov_high_cri_7_0 : 8; + u32 bw_freqov_high_cri_9_8 : 2; + u32 biascaldone_cri : 1; + u32 plllock_gain_tran_cri : 1; + u32 plllock_true_lock_cri : 1; + u32 pllunlock_flag_cri : 1; + u32 afcerr_cri : 1; + u32 afcdone_cri : 1; + u32 feedfwrdgain_cal_cri_7_0 : 8; + u32 m2fbdivmod_cri_7_0 : 8; + } field; + u32 val; +}; + +#endif /* _ICE_CGU_REGS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 157add1268d9..2a1ee60e85f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2189,6 +2189,18 @@ ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, info->clk_freq = (number & ICE_TS_CLK_FREQ_M) >> ICE_TS_CLK_FREQ_S; info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0); + if (info->clk_freq < NUM_ICE_TIME_REF_FREQ) { + info->time_ref = (enum ice_time_ref_freq)info->clk_freq; + } else { + /* Unknown clock frequency, so assume a (probably incorrect) + * default to avoid out-of-bounds look ups of frequency + * related information. + */ + ice_debug(hw, ICE_DBG_INIT, "1588 func caps: unknown clock frequency %u\n", + info->clk_freq); + info->time_ref = ICE_TIME_REF_FREQ_25_000; + } + ice_debug(hw, ICE_DBG_INIT, "func caps: ieee_1588 = %u\n", func_p->common_cap.ieee_1588); ice_debug(hw, ICE_DBG_INIT, "func caps: src_tmr_owned = %u\n", diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index a49082485642..d16738a3d3a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -100,6 +100,7 @@ #define PF_SB_ATQT 0x0022FE00 #define PF_SB_ATQT_ATQT_S 0 #define PF_SB_ATQT_ATQT_M ICE_M(0x3FF, 0) +#define PF_SB_REM_DEV_CTL 0x002300F0 #define PRTDCB_GENC 0x00083000 #define PRTDCB_GENC_PFCLDA_S 16 #define PRTDCB_GENC_PFCLDA_M ICE_M(0xFFFF, 16) @@ -440,6 +441,10 @@ #define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8)) #define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8)) #define PRTRPB_RDPC 0x000AC260 +#define GLHH_ART_CTL 0x000A41D4 +#define GLHH_ART_CTL_ACTIVE_M BIT(0) +#define GLHH_ART_TIME_H 0x000A41D8 +#define GLHH_ART_TIME_L 0x000A41DC #define GLTSYN_AUX_IN_0(_i) (0x000889D8 + ((_i) * 4)) #define GLTSYN_AUX_IN_0_INT_ENA_M BIT(4) #define GLTSYN_AUX_OUT_0(_i) (0x00088998 + ((_i) * 4)) @@ -452,6 +457,8 @@ #define GLTSYN_ENA_TSYN_ENA_M BIT(0) #define GLTSYN_EVNT_H_0(_i) (0x00088970 + ((_i) * 4)) #define GLTSYN_EVNT_L_0(_i) (0x00088968 + ((_i) * 4)) +#define GLTSYN_HHTIME_H(_i) (0x00088900 + ((_i) * 4)) +#define GLTSYN_HHTIME_L(_i) (0x000888F8 + ((_i) * 4)) #define GLTSYN_INCVAL_H(_i) (0x00088920 + ((_i) * 4)) #define GLTSYN_INCVAL_L(_i) (0x00088918 + ((_i) * 4)) #define GLTSYN_SHADJ_H(_i) (0x00088910 + ((_i) * 4)) @@ -468,6 +475,8 @@ #define GLTSYN_TGT_L_0(_i) (0x00088928 + ((_i) * 4)) #define GLTSYN_TIME_H(_i) (0x000888D8 + ((_i) * 4)) #define GLTSYN_TIME_L(_i) (0x000888D0 + ((_i) * 4)) +#define PFHH_SEM 0x000A4200 /* Reset Source: PFR */ +#define PFHH_SEM_BUSY_M BIT(0) #define PFTSYN_SEM 0x00088880 #define PFTSYN_SEM_BUSY_M BIT(0) #define VSIQF_FD_CNT(_VSI) (0x00464000 + ((_VSI) * 4)) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 865f2231bb24..476533f63e09 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -539,7 +539,7 @@ skip: ice_pf_dis_all_vsi(pf, false); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - ice_ptp_release(pf); + ice_ptp_prepare_for_reset(pf); if (hw->port_info) ice_sched_clear_port(hw->port_info); @@ -1063,6 +1063,9 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; + if (!ice_is_e810(&pf->hw)) + ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) ice_dcb_rebuild(pf); @@ -4123,13 +4126,14 @@ static void ice_log_pkg_init(struct ice_hw *hw, enum ice_ddp_state state) break; case ICE_DDP_PKG_LOAD_ERROR: dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n"); - /* poll for reset to complete */ - if (ice_check_reset(hw)) - dev_err(dev, "Error resetting device. Please reload the driver\n"); + /* poll for reset to complete */ + if (ice_check_reset(hw)) + dev_err(dev, "Error resetting device. Please reload the driver\n"); break; case ICE_DDP_PKG_ERR: default: dev_err(dev, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n"); + break; } } @@ -5839,6 +5843,8 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); + if (!ice_is_e810(&pf->hw)) + ice_ptp_link_change(pf, pf->hw.pf_id, true); } /* clear this now, and the first stats read will be used as baseline */ @@ -6239,6 +6245,8 @@ int ice_down(struct ice_vsi *vsi) WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state)); if (vsi->netdev && vsi->type == ICE_VSI_PF) { + if (!ice_is_e810(&vsi->back->hw)) + ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { @@ -6685,7 +6693,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) * fail. */ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - ice_ptp_init(pf); + ice_ptp_reset(pf); /* rebuild PF VSI */ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF); @@ -6694,6 +6702,10 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } + /* configure PTP timestamping after VSI rebuild */ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_cfg_timestamp(pf, false); + err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL); if (err) { dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0014a1002ed3..d3f65e061a62 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -6,6 +6,8 @@ #define E810_OUT_PROP_DELAY_NS 1 +#define UNKNOWN_INCVAL_E822 0x100000000ULL + static const struct ptp_pin_desc ice_pin_desc_e810t[] = { /* name idx func chan */ { "GNSS", GNSS, PTP_PF_EXTTS, 0, { 0, } }, @@ -281,6 +283,8 @@ static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) else val &= ~PFINT_OICR_TSYN_TX_M; wr32(&pf->hw, PFINT_OICR_ENA, val); + + pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; } /** @@ -303,6 +307,9 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on) continue; vsi->rx_rings[i]->ptp_rx = on; } + + pf->ptp.tstamp_config.rx_filter = on ? HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; } /** @@ -313,18 +320,10 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on) * This function will configure timestamping during PTP initialization * and deinitialization */ -static void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) +void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { ice_set_tx_tstamp(pf, ena); ice_set_rx_tstamp(pf, ena); - - if (ena) { - pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL; - pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_ON; - } else { - pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; - pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_OFF; - } } /** @@ -682,6 +681,406 @@ static int ice_ptp_write_adj(struct ice_pf *pf, s32 adj) } /** + * ice_base_incval - Get base timer increment value + * @pf: Board private structure + * + * Look up the base timer increment value for this device. The base increment + * value is used to define the nominal clock tick rate. This increment value + * is programmed during device initialization. It is also used as the basis + * for calculating adjustments using scaled_ppm. + */ +static u64 ice_base_incval(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + u64 incval; + + if (ice_is_e810(hw)) + incval = ICE_PTP_NOMINAL_INCVAL_E810; + else if (ice_e822_time_ref(hw) < NUM_ICE_TIME_REF_FREQ) + incval = ice_e822_nominal_incval(ice_e822_time_ref(hw)); + else + incval = UNKNOWN_INCVAL_E822; + + dev_dbg(ice_pf_to_dev(pf), "PTP: using base increment value of 0x%016llx\n", + incval); + + return incval; +} + +/** + * ice_ptp_reset_ts_memory_quad - Reset timestamp memory for one quad + * @pf: The PF private data structure + * @quad: The quad (0-4) + */ +static void ice_ptp_reset_ts_memory_quad(struct ice_pf *pf, int quad) +{ + struct ice_hw *hw = &pf->hw; + + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); +} + +/** + * ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state + * @port: PTP port for which Tx FIFO is checked + */ +static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) +{ + int quad = port->port_num / ICE_PORTS_PER_QUAD; + int offs = port->port_num % ICE_PORTS_PER_QUAD; + struct ice_pf *pf; + struct ice_hw *hw; + u32 val, phy_sts; + int err; + + pf = ptp_port_to_pf(port); + hw = &pf->hw; + + if (port->tx_fifo_busy_cnt == FIFO_OK) + return 0; + + /* need to read FIFO state */ + if (offs == 0 || offs == 1) + err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO01_STATUS, + &val); + else + err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO23_STATUS, + &val); + + if (err) { + dev_err(ice_pf_to_dev(pf), "PTP failed to check port %d Tx FIFO, err %d\n", + port->port_num, err); + return err; + } + + if (offs & 0x1) + phy_sts = (val & Q_REG_FIFO13_M) >> Q_REG_FIFO13_S; + else + phy_sts = (val & Q_REG_FIFO02_M) >> Q_REG_FIFO02_S; + + if (phy_sts & FIFO_EMPTY) { + port->tx_fifo_busy_cnt = FIFO_OK; + return 0; + } + + port->tx_fifo_busy_cnt++; + + dev_dbg(ice_pf_to_dev(pf), "Try %d, port %d FIFO not empty\n", + port->tx_fifo_busy_cnt, port->port_num); + + if (port->tx_fifo_busy_cnt == ICE_PTP_FIFO_NUM_CHECKS) { + dev_dbg(ice_pf_to_dev(pf), + "Port %d Tx FIFO still not empty; resetting quad %d\n", + port->port_num, quad); + ice_ptp_reset_ts_memory_quad(pf, quad); + port->tx_fifo_busy_cnt = FIFO_OK; + return 0; + } + + return -EAGAIN; +} + +/** + * ice_ptp_check_tx_offset_valid - Check if the Tx PHY offset is valid + * @port: the PTP port to check + * + * Checks whether the Tx offset for the PHY associated with this port is + * valid. Returns 0 if the offset is valid, and a non-zero error code if it is + * not. + */ +static int ice_ptp_check_tx_offset_valid(struct ice_ptp_port *port) +{ + struct ice_pf *pf = ptp_port_to_pf(port); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + u32 val; + int err; + + err = ice_ptp_check_tx_fifo(port); + if (err) + return err; + + err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_TX_OV_STATUS, + &val); + if (err) { + dev_err(dev, "Failed to read TX_OV_STATUS for port %d, err %d\n", + port->port_num, err); + return -EAGAIN; + } + + if (!(val & P_REG_TX_OV_STATUS_OV_M)) + return -EAGAIN; + + return 0; +} + +/** + * ice_ptp_check_rx_offset_valid - Check if the Rx PHY offset is valid + * @port: the PTP port to check + * + * Checks whether the Rx offset for the PHY associated with this port is + * valid. Returns 0 if the offset is valid, and a non-zero error code if it is + * not. + */ +static int ice_ptp_check_rx_offset_valid(struct ice_ptp_port *port) +{ + struct ice_pf *pf = ptp_port_to_pf(port); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + int err; + u32 val; + + err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_RX_OV_STATUS, + &val); + if (err) { + dev_err(dev, "Failed to read RX_OV_STATUS for port %d, err %d\n", + port->port_num, err); + return err; + } + + if (!(val & P_REG_RX_OV_STATUS_OV_M)) + return -EAGAIN; + + return 0; +} + +/** + * ice_ptp_check_offset_valid - Check port offset valid bit + * @port: Port for which offset valid bit is checked + * + * Returns 0 if both Tx and Rx offset are valid, and -EAGAIN if one of the + * offset is not ready. + */ +static int ice_ptp_check_offset_valid(struct ice_ptp_port *port) +{ + int tx_err, rx_err; + + /* always check both Tx and Rx offset validity */ + tx_err = ice_ptp_check_tx_offset_valid(port); + rx_err = ice_ptp_check_rx_offset_valid(port); + + if (tx_err || rx_err) + return -EAGAIN; + + return 0; +} + +/** + * ice_ptp_wait_for_offset_valid - Check for valid Tx and Rx offsets + * @work: Pointer to the kthread_work structure for this task + * + * Check whether both the Tx and Rx offsets are valid for enabling the vernier + * calibration. + * + * Once we have valid offsets from hardware, update the total Tx and Rx + * offsets, and exit bypass mode. This enables more precise timestamps using + * the extra data measured during the vernier calibration process. + */ +static void ice_ptp_wait_for_offset_valid(struct kthread_work *work) +{ + struct ice_ptp_port *port; + int err; + struct device *dev; + struct ice_pf *pf; + struct ice_hw *hw; + + port = container_of(work, struct ice_ptp_port, ov_work.work); + pf = ptp_port_to_pf(port); + hw = &pf->hw; + dev = ice_pf_to_dev(pf); + + if (ice_ptp_check_offset_valid(port)) { + /* Offsets not ready yet, try again later */ + kthread_queue_delayed_work(pf->ptp.kworker, + &port->ov_work, + msecs_to_jiffies(100)); + return; + } + + /* Offsets are valid, so it is safe to exit bypass mode */ + err = ice_phy_exit_bypass_e822(hw, port->port_num); + if (err) { + dev_warn(dev, "Failed to exit bypass mode for PHY port %u, err %d\n", + port->port_num, err); + return; + } +} + +/** + * ice_ptp_port_phy_stop - Stop timestamping for a PHY port + * @ptp_port: PTP port to stop + */ +static int +ice_ptp_port_phy_stop(struct ice_ptp_port *ptp_port) +{ + struct ice_pf *pf = ptp_port_to_pf(ptp_port); + u8 port = ptp_port->port_num; + struct ice_hw *hw = &pf->hw; + int err; + + if (ice_is_e810(hw)) + return 0; + + mutex_lock(&ptp_port->ps_lock); + + kthread_cancel_delayed_work_sync(&ptp_port->ov_work); + + err = ice_stop_phy_timer_e822(hw, port, true); + if (err) + dev_err(ice_pf_to_dev(pf), "PTP failed to set PHY port %d down, err %d\n", + port, err); + + mutex_unlock(&ptp_port->ps_lock); + + return err; +} + +/** + * ice_ptp_port_phy_restart - (Re)start and calibrate PHY timestamping + * @ptp_port: PTP port for which the PHY start is set + * + * Start the PHY timestamping block, and initiate Vernier timestamping + * calibration. If timestamping cannot be calibrated (such as if link is down) + * then disable the timestamping block instead. + */ +static int +ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) +{ + struct ice_pf *pf = ptp_port_to_pf(ptp_port); + u8 port = ptp_port->port_num; + struct ice_hw *hw = &pf->hw; + int err; + + if (ice_is_e810(hw)) + return 0; + + if (!ptp_port->link_up) + return ice_ptp_port_phy_stop(ptp_port); + + mutex_lock(&ptp_port->ps_lock); + + kthread_cancel_delayed_work_sync(&ptp_port->ov_work); + + /* temporarily disable Tx timestamps while calibrating PHY offset */ + ptp_port->tx.calibrating = true; + ptp_port->tx_fifo_busy_cnt = 0; + + /* Start the PHY timer in bypass mode */ + err = ice_start_phy_timer_e822(hw, port, true); + if (err) + goto out_unlock; + + /* Enable Tx timestamps right away */ + ptp_port->tx.calibrating = false; + + kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); + +out_unlock: + if (err) + dev_err(ice_pf_to_dev(pf), "PTP failed to set PHY port %d up, err %d\n", + port, err); + + mutex_unlock(&ptp_port->ps_lock); + + return err; +} + +/** + * ice_ptp_link_change - Set or clear port registers for timestamping + * @pf: Board private structure + * @port: Port for which the PHY start is set + * @linkup: Link is up or down + */ +int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +{ + struct ice_ptp_port *ptp_port; + + if (!test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + return 0; + + if (port >= ICE_NUM_EXTERNAL_PORTS) + return -EINVAL; + + ptp_port = &pf->ptp.port; + if (ptp_port->port_num != port) + return -EINVAL; + + /* Update cached link err for this port immediately */ + ptp_port->link_up = linkup; + + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + /* PTP is not setup */ + return -EAGAIN; + + return ice_ptp_port_phy_restart(ptp_port); +} + +/** + * ice_ptp_reset_ts_memory - Reset timestamp memory for all quads + * @pf: The PF private data structure + */ +static void ice_ptp_reset_ts_memory(struct ice_pf *pf) +{ + int quad; + + quad = pf->hw.port_info->lport / ICE_PORTS_PER_QUAD; + ice_ptp_reset_ts_memory_quad(pf, quad); +} + +/** + * ice_ptp_tx_ena_intr - Enable or disable the Tx timestamp interrupt + * @pf: PF private structure + * @ena: bool value to enable or disable interrupt + * @threshold: Minimum number of packets at which intr is triggered + * + * Utility function to enable or disable Tx timestamp interrupt and threshold + */ +static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold) +{ + struct ice_hw *hw = &pf->hw; + int err = 0; + int quad; + u32 val; + + ice_ptp_reset_ts_memory(pf); + + for (quad = 0; quad < ICE_MAX_QUAD; quad++) { + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, + &val); + if (err) + break; + + if (ena) { + val |= Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M; + val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_THR_M; + val |= ((threshold << Q_REG_TX_MEM_GBL_CFG_INTR_THR_S) & + Q_REG_TX_MEM_GBL_CFG_INTR_THR_M); + } else { + val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M; + } + + err = ice_write_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, + val); + if (err) + break; + } + + if (err) + dev_err(ice_pf_to_dev(pf), "PTP failed in intr ena, err %d\n", + err); + return err; +} + +/** + * ice_ptp_reset_phy_timestamping - Reset PHY timestamping block + * @pf: Board private structure + */ +static void ice_ptp_reset_phy_timestamping(struct ice_pf *pf) +{ + ice_ptp_port_phy_restart(&pf->ptp.port); +} + +/** * ice_ptp_adjfine - Adjust clock increment rate * @info: the driver's PTP info structure * @scaled_ppm: Parts per million with 16-bit fractional field @@ -698,7 +1097,7 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) int neg_adj = 0; int err; - incval = ICE_PTP_NOMINAL_INCVAL_E810; + incval = ice_base_incval(pf); if (scaled_ppm < 0) { neg_adj = 1; @@ -905,7 +1304,10 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, start_time = div64_u64(current_time + NSEC_PER_SEC - 1, NSEC_PER_SEC) * NSEC_PER_SEC + phase; - start_time -= E810_OUT_PROP_DELAY_NS; + if (ice_is_e810(hw)) + start_time -= E810_OUT_PROP_DELAY_NS; + else + start_time -= ice_e822_pps_delay(ice_e822_time_ref(hw)); /* 2. Write TARGET time */ wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), lower_32_bits(start_time)); @@ -1088,6 +1490,12 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) struct ice_hw *hw = &pf->hw; int err; + /* For Vernier mode, we need to recalibrate after new settime + * Start with disabling timestamp block + */ + if (pf->ptp.port.link_up) + ice_ptp_port_phy_stop(&pf->ptp.port); + if (!ice_ptp_lock(hw)) { err = -EBUSY; goto exit; @@ -1104,6 +1512,10 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) /* Reenable periodic outputs */ ice_ptp_enable_all_clkout(pf); + + /* Recalibrate and re-enable timestamp block */ + if (pf->ptp.port.link_up) + ice_ptp_port_phy_restart(&pf->ptp.port); exit: if (err) { dev_err(ice_pf_to_dev(pf), "PTP failed to set time %d\n", err); @@ -1177,6 +1589,101 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta) return 0; } +#ifdef CONFIG_ICE_HWTS +/** + * ice_ptp_get_syncdevicetime - Get the cross time stamp info + * @device: Current device time + * @system: System counter value read synchronously with device time + * @ctx: Context provided by timekeeping code + * + * Read device and system (ART) clock simultaneously and return the corrected + * clock values in ns. + */ +static int +ice_ptp_get_syncdevicetime(ktime_t *device, + struct system_counterval_t *system, + void *ctx) +{ + struct ice_pf *pf = (struct ice_pf *)ctx; + struct ice_hw *hw = &pf->hw; + u32 hh_lock, hh_art_ctl; + int i; + + /* Get the HW lock */ + hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); + if (hh_lock & PFHH_SEM_BUSY_M) { + dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n"); + return -EFAULT; + } + + /* Start the ART and device clock sync sequence */ + hh_art_ctl = rd32(hw, GLHH_ART_CTL); + hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M; + wr32(hw, GLHH_ART_CTL, hh_art_ctl); + +#define MAX_HH_LOCK_TRIES 100 + + for (i = 0; i < MAX_HH_LOCK_TRIES; i++) { + /* Wait for sync to complete */ + hh_art_ctl = rd32(hw, GLHH_ART_CTL); + if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) { + udelay(1); + continue; + } else { + u32 hh_ts_lo, hh_ts_hi, tmr_idx; + u64 hh_ts; + + tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; + /* Read ART time */ + hh_ts_lo = rd32(hw, GLHH_ART_TIME_L); + hh_ts_hi = rd32(hw, GLHH_ART_TIME_H); + hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; + *system = convert_art_ns_to_tsc(hh_ts); + /* Read Device source clock time */ + hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx)); + hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx)); + hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; + *device = ns_to_ktime(hh_ts); + break; + } + } + /* Release HW lock */ + hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); + hh_lock = hh_lock & ~PFHH_SEM_BUSY_M; + wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock); + + if (i == MAX_HH_LOCK_TRIES) + return -ETIMEDOUT; + + return 0; +} + +/** + * ice_ptp_getcrosststamp_e822 - Capture a device cross timestamp + * @info: the driver's PTP info structure + * @cts: The memory to fill the cross timestamp info + * + * Capture a cross timestamp between the ART and the device PTP hardware + * clock. Fill the cross timestamp information and report it back to the + * caller. + * + * This is only valid for E822 devices which have support for generating the + * cross timestamp via PCIe PTM. + * + * In order to correctly correlate the ART timestamp back to the TSC time, the + * CPU must have X86_FEATURE_TSC_KNOWN_FREQ. + */ +static int +ice_ptp_getcrosststamp_e822(struct ptp_clock_info *info, + struct system_device_crosststamp *cts) +{ + struct ice_pf *pf = ptp_info_to_pf(info); + + return get_device_system_crosststamp(ice_ptp_get_syncdevicetime, + pf, NULL, cts); +} +#endif /* CONFIG_ICE_HWTS */ + /** * ice_ptp_get_ts_config - ioctl interface to read the timestamping config * @pf: Board private structure @@ -1234,7 +1741,6 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: case HWTSTAMP_FILTER_ALL: - config->rx_filter = HWTSTAMP_FILTER_ALL; ice_set_rx_tstamp(pf, true); break; default: @@ -1266,8 +1772,8 @@ int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) if (err) return err; - /* Save these settings for future reference */ - pf->ptp.tstamp_config = config; + /* Return the actual configuration set */ + config = pf->ptp.tstamp_config; return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; @@ -1399,6 +1905,26 @@ static void ice_ptp_setup_pins_e810(struct ptp_clock_info *info) } /** + * ice_ptp_set_funcs_e822 - Set specialized functions for E822 support + * @pf: Board private structure + * @info: PTP info to fill + * + * Assign functions to the PTP capabiltiies structure for E822 devices. + * Functions which operate across all device families should be set directly + * in ice_ptp_set_caps. Only add functions here which are distinct for E822 + * devices. + */ +static void +ice_ptp_set_funcs_e822(struct ice_pf *pf, struct ptp_clock_info *info) +{ +#ifdef CONFIG_ICE_HWTS + if (boot_cpu_has(X86_FEATURE_ART) && + boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) + info->getcrosststamp = ice_ptp_getcrosststamp_e822; +#endif /* CONFIG_ICE_HWTS */ +} + +/** * ice_ptp_set_funcs_e810 - Set specialized functions for E810 support * @pf: Board private structure * @info: PTP info to fill @@ -1437,7 +1963,10 @@ static void ice_ptp_set_caps(struct ice_pf *pf) info->gettimex64 = ice_ptp_gettimex64; info->settime64 = ice_ptp_settime64; - ice_ptp_set_funcs_e810(pf, info); + if (ice_is_e810(&pf->hw)) + ice_ptp_set_funcs_e810(pf, info); + else + ice_ptp_set_funcs_e822(pf, info); } /** @@ -1584,7 +2113,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) u8 idx; /* Check if this tracker is initialized */ - if (!tx->init) + if (!tx->init || tx->calibrating) return -1; spin_lock(&tx->lock); @@ -1707,6 +2236,27 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) } /** + * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps + * @pf: Board private structure + * @tx: the Tx tracking structure to initialize + * @port: the port this structure tracks + * + * Initialize the Tx timestamp tracker for this port. For generic MAC devices, + * the timestamp block is shared for all ports in the same quad. To avoid + * ports using the same timestamp index, logically break the block of + * registers into chunks based on the port number. + */ +static int +ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) +{ + tx->quad = port / ICE_PORTS_PER_QUAD; + tx->quad_offset = tx->quad * INDEX_PER_PORT; + tx->len = INDEX_PER_PORT; + + return ice_ptp_alloc_tx_tracker(tx); +} + +/** * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps * @pf: Board private structure * @tx: the Tx tracking structure to initialize @@ -1777,6 +2327,130 @@ static void ice_ptp_periodic_work(struct kthread_work *work) } /** + * ice_ptp_reset - Initialize PTP hardware clock support after reset + * @pf: Board private structure + */ +void ice_ptp_reset(struct ice_pf *pf) +{ + struct ice_ptp *ptp = &pf->ptp; + struct ice_hw *hw = &pf->hw; + struct timespec64 ts; + int err, itr = 1; + u64 time_diff; + + if (test_bit(ICE_PFR_REQ, pf->state)) + goto pfr; + + if (!hw->func_caps.ts_func_info.src_tmr_owned) + goto reset_ts; + + err = ice_ptp_init_phc(hw); + if (err) + goto err; + + /* Acquire the global hardware lock */ + if (!ice_ptp_lock(hw)) { + err = -EBUSY; + goto err; + } + + /* Write the increment time value to PHY and LAN */ + err = ice_ptp_write_incval(hw, ice_base_incval(pf)); + if (err) { + ice_ptp_unlock(hw); + goto err; + } + + /* Write the initial Time value to PHY and LAN using the cached PHC + * time before the reset and time difference between stopping and + * starting the clock. + */ + if (ptp->cached_phc_time) { + time_diff = ktime_get_real_ns() - ptp->reset_time; + ts = ns_to_timespec64(ptp->cached_phc_time + time_diff); + } else { + ts = ktime_to_timespec64(ktime_get_real()); + } + err = ice_ptp_write_init(pf, &ts); + if (err) { + ice_ptp_unlock(hw); + goto err; + } + + /* Release the global hardware lock */ + ice_ptp_unlock(hw); + + if (!ice_is_e810(hw)) { + /* Enable quad interrupts */ + err = ice_ptp_tx_ena_intr(pf, true, itr); + if (err) + goto err; + } + +reset_ts: + /* Restart the PHY timestamping block */ + ice_ptp_reset_phy_timestamping(pf); + +pfr: + /* Init Tx structures */ + if (ice_is_e810(&pf->hw)) { + err = ice_ptp_init_tx_e810(pf, &ptp->port.tx); + } else { + kthread_init_delayed_work(&ptp->port.ov_work, + ice_ptp_wait_for_offset_valid); + err = ice_ptp_init_tx_e822(pf, &ptp->port.tx, + ptp->port.port_num); + } + if (err) + goto err; + + set_bit(ICE_FLAG_PTP, pf->flags); + + /* Start periodic work going */ + kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0); + + dev_info(ice_pf_to_dev(pf), "PTP reset successful\n"); + return; + +err: + dev_err(ice_pf_to_dev(pf), "PTP reset failed %d\n", err); +} + +/** + * ice_ptp_prepare_for_reset - Prepare PTP for reset + * @pf: Board private structure + */ +void ice_ptp_prepare_for_reset(struct ice_pf *pf) +{ + struct ice_ptp *ptp = &pf->ptp; + u8 src_tmr; + + clear_bit(ICE_FLAG_PTP, pf->flags); + + /* Disable timestamping for both Tx and Rx */ + ice_ptp_cfg_timestamp(pf, false); + + kthread_cancel_delayed_work_sync(&ptp->work); + kthread_cancel_work_sync(&ptp->extts_work); + + if (test_bit(ICE_PFR_REQ, pf->state)) + return; + + ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx); + + /* Disable periodic outputs */ + ice_ptp_disable_all_clkout(pf); + + src_tmr = ice_get_ptp_src_clock_index(&pf->hw); + + /* Disable source clock */ + wr32(&pf->hw, GLTSYN_ENA(src_tmr), (u32)~GLTSYN_ENA_TSYN_ENA_M); + + /* Acquire PHC and system timer to restore after reset */ + ptp->reset_time = ktime_get_real_ns(); +} + +/** * ice_ptp_init_owner - Initialize PTP_1588_CLOCK device * @pf: Board private structure * @@ -1786,27 +2460,16 @@ static void ice_ptp_periodic_work(struct kthread_work *work) */ static int ice_ptp_init_owner(struct ice_pf *pf) { - struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; struct timespec64 ts; - u8 src_idx; - int err; - - wr32(hw, GLTSYN_SYNC_DLAY, 0); - - /* Clear some HW residue and enable source clock */ - src_idx = hw->func_caps.ts_func_info.tmr_index_owned; + int err, itr = 1; - /* Enable source clocks */ - wr32(hw, GLTSYN_ENA(src_idx), GLTSYN_ENA_TSYN_ENA_M); - - /* Enable PHY time sync */ - err = ice_ptp_init_phy_e810(hw); - if (err) - goto err_exit; - - /* Clear event status indications for auxiliary pins */ - (void)rd32(hw, GLTSYN_STAT(src_idx)); + err = ice_ptp_init_phc(hw); + if (err) { + dev_err(ice_pf_to_dev(pf), "Failed to initialize PHC, err %d\n", + err); + return err; + } /* Acquire the global hardware lock */ if (!ice_ptp_lock(hw)) { @@ -1815,7 +2478,7 @@ static int ice_ptp_init_owner(struct ice_pf *pf) } /* Write the increment time value to PHY and LAN */ - err = ice_ptp_write_incval(hw, ICE_PTP_NOMINAL_INCVAL_E810); + err = ice_ptp_write_incval(hw, ice_base_incval(pf)); if (err) { ice_ptp_unlock(hw); goto err_exit; @@ -1832,6 +2495,13 @@ static int ice_ptp_init_owner(struct ice_pf *pf) /* Release the global hardware lock */ ice_ptp_unlock(hw); + if (!ice_is_e810(hw)) { + /* Enable quad interrupts */ + err = ice_ptp_tx_ena_intr(pf, true, itr); + if (err) + goto err_exit; + } + /* Ensure we have a clock device */ err = ice_ptp_create_clock(pf); if (err) @@ -1845,72 +2515,106 @@ static int ice_ptp_init_owner(struct ice_pf *pf) err_clk: pf->ptp.clock = NULL; err_exit: - dev_err(dev, "PTP failed to register clock, err %d\n", err); - return err; } /** - * ice_ptp_init - Initialize the PTP support after device probe or reset + * ice_ptp_init_work - Initialize PTP work threads + * @pf: Board private structure + * @ptp: PF PTP structure + */ +static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp) +{ + struct kthread_worker *kworker; + + /* Initialize work functions */ + kthread_init_delayed_work(&ptp->work, ice_ptp_periodic_work); + kthread_init_work(&ptp->extts_work, ice_ptp_extts_work); + + /* Allocate a kworker for handling work required for the ports + * connected to the PTP hardware clock. + */ + kworker = kthread_create_worker(0, "ice-ptp-%s", + dev_name(ice_pf_to_dev(pf))); + if (IS_ERR(kworker)) + return PTR_ERR(kworker); + + ptp->kworker = kworker; + + /* Start periodic work going */ + kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0); + + return 0; +} + +/** + * ice_ptp_init_port - Initialize PTP port structure + * @pf: Board private structure + * @ptp_port: PTP port structure + */ +static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) +{ + mutex_init(&ptp_port->ps_lock); + + if (ice_is_e810(&pf->hw)) + return ice_ptp_init_tx_e810(pf, &ptp_port->tx); + + kthread_init_delayed_work(&ptp_port->ov_work, + ice_ptp_wait_for_offset_valid); + return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num); +} + +/** + * ice_ptp_init - Initialize PTP hardware clock support * @pf: Board private structure * - * This function sets device up for PTP support. The first time it is run, it - * will create a clock device. It does not create a clock device if one - * already exists. It also reconfigures the device after a reset. + * Set up the device for interacting with the PTP hardware clock for all + * functions, both the function that owns the clock hardware, and the + * functions connected to the clock hardware. + * + * The clock owner will allocate and register a ptp_clock with the + * PTP_1588_CLOCK infrastructure. All functions allocate a kthread and work + * items used for asynchronous work such as Tx timestamps and periodic work. */ void ice_ptp_init(struct ice_pf *pf) { - struct device *dev = ice_pf_to_dev(pf); - struct kthread_worker *kworker; + struct ice_ptp *ptp = &pf->ptp; struct ice_hw *hw = &pf->hw; int err; - /* PTP is currently only supported on E810 devices */ - if (!ice_is_e810(hw)) - return; - - /* Check if this PF owns the source timer */ + /* If this function owns the clock hardware, it must allocate and + * configure the PTP clock device to represent it. + */ if (hw->func_caps.ts_func_info.src_tmr_owned) { err = ice_ptp_init_owner(pf); if (err) - return; + goto err; } - /* Disable timestamping for both Tx and Rx */ - ice_ptp_cfg_timestamp(pf, false); - - /* Initialize the PTP port Tx timestamp tracker */ - ice_ptp_init_tx_e810(pf, &pf->ptp.port.tx); - - /* Initialize work functions */ - kthread_init_delayed_work(&pf->ptp.work, ice_ptp_periodic_work); - kthread_init_work(&pf->ptp.extts_work, ice_ptp_extts_work); + ptp->port.port_num = hw->pf_id; + err = ice_ptp_init_port(pf, &ptp->port); + if (err) + goto err; - /* Allocate a kworker for handling work required for the ports - * connected to the PTP hardware clock. - */ - kworker = kthread_create_worker(0, "ice-ptp-%s", dev_name(dev)); - if (IS_ERR(kworker)) { - err = PTR_ERR(kworker); - goto err_kworker; - } - pf->ptp.kworker = kworker; + /* Start the PHY timestamping block */ + ice_ptp_reset_phy_timestamping(pf); set_bit(ICE_FLAG_PTP, pf->flags); + err = ice_ptp_init_work(pf, ptp); + if (err) + goto err; - /* Start periodic work going */ - kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work, 0); - - dev_info(dev, "PTP init successful\n"); + dev_info(ice_pf_to_dev(pf), "PTP init successful\n"); return; -err_kworker: +err: /* If we registered a PTP clock, release it */ if (pf->ptp.clock) { - ptp_clock_unregister(pf->ptp.clock); + ptp_clock_unregister(ptp->clock); pf->ptp.clock = NULL; } - dev_err(dev, "PTP failed %d\n", err); + clear_bit(ICE_FLAG_PTP, pf->flags); + dev_err(ice_pf_to_dev(pf), "PTP failed %d\n", err); } /** @@ -1934,6 +2638,8 @@ void ice_ptp_release(struct ice_pf *pf) kthread_cancel_delayed_work_sync(&pf->ptp.work); + ice_ptp_port_phy_stop(&pf->ptp.port); + mutex_destroy(&pf->ptp.port.ps_lock); if (pf->ptp.kworker) { kthread_destroy_worker(pf->ptp.kworker); pf->ptp.kworker = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 53c15fc9d996..afd048d69959 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -82,6 +82,8 @@ struct ice_tx_tstamp { * @quad_offset: offset into timestamp block of the quad to get the real index * @len: length of the tstamps and in_use fields. * @init: if true, the tracker is initialized; + * @calibrating: if true, the PHY is calibrating the Tx offset. During this + * window, timestamps are temporarily disabled. */ struct ice_ptp_tx { struct kthread_work work; @@ -92,6 +94,7 @@ struct ice_ptp_tx { u8 quad_offset; u8 len; u8 init; + u8 calibrating; }; /* Quad and port information for initializing timestamp blocks */ @@ -101,15 +104,24 @@ struct ice_ptp_tx { /** * struct ice_ptp_port - data used to initialize an external port for PTP * - * This structure contains PTP data related to the external ports. Currently - * it is used for tracking the Tx timestamps of a port. In the future this - * structure will also hold information for the E822 port initialization - * logic. + * This structure contains data indicating whether a single external port is + * ready for PTP functionality. It is used to track the port initialization + * and determine when the port's PHY offset is valid. * * @tx: Tx timestamp tracking for this port + * @ov_work: delayed work task for tracking when PHY offset is valid + * @ps_lock: mutex used to protect the overall PTP PHY start procedure + * @link_up: indicates whether the link is up + * @tx_fifo_busy_cnt: number of times the Tx FIFO was busy + * @port_num: the port number this structure represents */ struct ice_ptp_port { struct ice_ptp_tx tx; + struct kthread_delayed_work ov_work; + struct mutex ps_lock; /* protects overall PTP PHY start procedure */ + bool link_up; + u8 tx_fifo_busy_cnt; + u8 port_num; }; #define GLTSYN_TGT_H_IDX_MAX 4 @@ -127,6 +139,7 @@ struct ice_ptp_port { * @info: structure defining PTP hardware capabilities * @clock: pointer to registered PTP clock device * @tstamp_config: hardware timestamping configuration + * @reset_time: kernel time after clock stop on reset */ struct ice_ptp { struct ice_ptp_port port; @@ -140,6 +153,7 @@ struct ice_ptp { struct ptp_clock_info info; struct ptp_clock *clock; struct hwtstamp_config tstamp_config; + u64 reset_time; }; #define __ptp_port_to_ptp(p) \ @@ -152,9 +166,15 @@ struct ice_ptp { #define ptp_info_to_pf(i) \ container_of(__ptp_info_to_ptp((i)), struct ice_pf, ptp) +#define PFTSYN_SEM_BYTES 4 #define PTP_SHARED_CLK_IDX_VALID BIT(31) +#define TS_CMD_MASK 0xF +#define SYNC_EXEC_CMD 0x3 #define ICE_PTP_TS_VALID BIT(0) +#define FIFO_EMPTY BIT(2) +#define FIFO_OK 0xFF +#define ICE_PTP_FIFO_NUM_CHECKS 5 /* Per-channel register definitions */ #define GLTSYN_AUX_OUT(_chan, _idx) (GLTSYN_AUX_OUT_0(_idx) + ((_chan) * 8)) #define GLTSYN_AUX_IN(_chan, _idx) (GLTSYN_AUX_IN_0(_idx) + ((_chan) * 8)) @@ -175,11 +195,13 @@ struct ice_ptp { #define N_PER_OUT_E810T 3 #define N_PER_OUT_E810T_NO_SMA 2 #define N_EXT_TS_E810_NO_SMA 2 +#define ETH_GLTSYN_ENA(_i) (0x03000348 + ((_i) * 4)) #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) struct ice_pf; int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr); int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr); +void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena); int ice_get_ptp_clock_index(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); @@ -188,8 +210,11 @@ void ice_ptp_process_ts(struct ice_pf *pf); void ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb); +void ice_ptp_reset(struct ice_pf *pf); +void ice_ptp_prepare_for_reset(struct ice_pf *pf); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); +int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) { @@ -201,6 +226,7 @@ static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) return -EOPNOTSUPP; } +static inline void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { } static inline int ice_get_ptp_clock_index(struct ice_pf *pf) { return -1; @@ -216,7 +242,11 @@ static inline void ice_ptp_process_ts(struct ice_pf *pf) { } static inline void ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { } +static inline void ice_ptp_reset(struct ice_pf *pf) { } +static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } +static inline int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +{ return 0; } #endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ #endif /* _ICE_PTP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h new file mode 100644 index 000000000000..4109aa3b2fcd --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h @@ -0,0 +1,374 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#ifndef _ICE_PTP_CONSTS_H_ +#define _ICE_PTP_CONSTS_H_ + +/* Constant definitions related to the hardware clock used for PTP 1588 + * features and functionality. + */ +/* Constants defined for the PTP 1588 clock hardware. */ + +/* struct ice_time_ref_info_e822 + * + * E822 hardware can use different sources as the reference for the PTP + * hardware clock. Each clock has different characteristics such as a slightly + * different frequency, etc. + * + * This lookup table defines several constants that depend on the current time + * reference. See the struct ice_time_ref_info_e822 for information about the + * meaning of each constant. + */ +const struct ice_time_ref_info_e822 e822_time_ref[NUM_ICE_TIME_REF_FREQ] = { + /* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */ + { + /* pll_freq */ + 823437500, /* 823.4375 MHz PLL */ + /* nominal_incval */ + 0x136e44fabULL, + /* pps_delay */ + 11, + }, + + /* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */ + { + /* pll_freq */ + 783360000, /* 783.36 MHz */ + /* nominal_incval */ + 0x146cc2177ULL, + /* pps_delay */ + 12, + }, + + /* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */ + { + /* pll_freq */ + 796875000, /* 796.875 MHz */ + /* nominal_incval */ + 0x141414141ULL, + /* pps_delay */ + 12, + }, + + /* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */ + { + /* pll_freq */ + 816000000, /* 816 MHz */ + /* nominal_incval */ + 0x139b9b9baULL, + /* pps_delay */ + 12, + }, + + /* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */ + { + /* pll_freq */ + 830078125, /* 830.78125 MHz */ + /* nominal_incval */ + 0x134679aceULL, + /* pps_delay */ + 11, + }, + + /* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */ + { + /* pll_freq */ + 783360000, /* 783.36 MHz */ + /* nominal_incval */ + 0x146cc2177ULL, + /* pps_delay */ + 12, + }, +}; + +const struct ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ] = { + /* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */ + { + /* refclk_pre_div */ + 1, + /* feedback_div */ + 197, + /* frac_n_div */ + 2621440, + /* post_pll_div */ + 6, + }, + + /* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */ + { + /* refclk_pre_div */ + 5, + /* feedback_div */ + 223, + /* frac_n_div */ + 524288, + /* post_pll_div */ + 7, + }, + + /* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */ + { + /* refclk_pre_div */ + 5, + /* feedback_div */ + 223, + /* frac_n_div */ + 524288, + /* post_pll_div */ + 7, + }, + + /* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */ + { + /* refclk_pre_div */ + 5, + /* feedback_div */ + 159, + /* frac_n_div */ + 1572864, + /* post_pll_div */ + 6, + }, + + /* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */ + { + /* refclk_pre_div */ + 5, + /* feedback_div */ + 159, + /* frac_n_div */ + 1572864, + /* post_pll_div */ + 6, + }, + + /* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */ + { + /* refclk_pre_div */ + 10, + /* feedback_div */ + 223, + /* frac_n_div */ + 524288, + /* post_pll_div */ + 7, + }, +}; + +/* struct ice_vernier_info_e822 + * + * E822 hardware calibrates the delay of the timestamp indication from the + * actual packet transmission or reception during the initialization of the + * PHY. To do this, the hardware mechanism uses some conversions between the + * various clocks within the PHY block. This table defines constants used to + * calculate the correct conversion ratios in the PHY registers. + * + * Many of the values relate to the PAR/PCS clock conversion registers. For + * these registers, a value of 0 means that the associated register is not + * used by this link speed, and that the register should be cleared by writing + * 0. Other values specify the clock frequency in Hz. + */ +const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD] = { + /* ICE_PTP_LNK_SPD_1G */ + { + /* tx_par_clk */ + 31250000, /* 31.25 MHz */ + /* rx_par_clk */ + 31250000, /* 31.25 MHz */ + /* tx_pcs_clk */ + 125000000, /* 125 MHz */ + /* rx_pcs_clk */ + 125000000, /* 125 MHz */ + /* tx_desk_rsgb_par */ + 0, /* unused */ + /* rx_desk_rsgb_par */ + 0, /* unused */ + /* tx_desk_rsgb_pcs */ + 0, /* unused */ + /* rx_desk_rsgb_pcs */ + 0, /* unused */ + /* tx_fixed_delay */ + 25140, + /* pmd_adj_divisor */ + 10000000, + /* rx_fixed_delay */ + 17372, + }, + /* ICE_PTP_LNK_SPD_10G */ + { + /* tx_par_clk */ + 257812500, /* 257.8125 MHz */ + /* rx_par_clk */ + 257812500, /* 257.8125 MHz */ + /* tx_pcs_clk */ + 156250000, /* 156.25 MHz */ + /* rx_pcs_clk */ + 156250000, /* 156.25 MHz */ + /* tx_desk_rsgb_par */ + 0, /* unused */ + /* rx_desk_rsgb_par */ + 0, /* unused */ + /* tx_desk_rsgb_pcs */ + 0, /* unused */ + /* rx_desk_rsgb_pcs */ + 0, /* unused */ + /* tx_fixed_delay */ + 6938, + /* pmd_adj_divisor */ + 82500000, + /* rx_fixed_delay */ + 6212, + }, + /* ICE_PTP_LNK_SPD_25G */ + { + /* tx_par_clk */ + 644531250, /* 644.53125 MHZ */ + /* rx_par_clk */ + 644531250, /* 644.53125 MHz */ + /* tx_pcs_clk */ + 390625000, /* 390.625 MHz */ + /* rx_pcs_clk */ + 390625000, /* 390.625 MHz */ + /* tx_desk_rsgb_par */ + 0, /* unused */ + /* rx_desk_rsgb_par */ + 0, /* unused */ + /* tx_desk_rsgb_pcs */ + 0, /* unused */ + /* rx_desk_rsgb_pcs */ + 0, /* unused */ + /* tx_fixed_delay */ + 2778, + /* pmd_adj_divisor */ + 206250000, + /* rx_fixed_delay */ + 2491, + }, + /* ICE_PTP_LNK_SPD_25G_RS */ + { + /* tx_par_clk */ + 0, /* unused */ + /* rx_par_clk */ + 0, /* unused */ + /* tx_pcs_clk */ + 0, /* unused */ + /* rx_pcs_clk */ + 0, /* unused */ + /* tx_desk_rsgb_par */ + 161132812, /* 162.1328125 MHz Reed Solomon gearbox */ + /* rx_desk_rsgb_par */ + 161132812, /* 162.1328125 MHz Reed Solomon gearbox */ + /* tx_desk_rsgb_pcs */ + 97656250, /* 97.62625 MHz Reed Solomon gearbox */ + /* rx_desk_rsgb_pcs */ + 97656250, /* 97.62625 MHz Reed Solomon gearbox */ + /* tx_fixed_delay */ + 3928, + /* pmd_adj_divisor */ + 206250000, + /* rx_fixed_delay */ + 29535, + }, + /* ICE_PTP_LNK_SPD_40G */ + { + /* tx_par_clk */ + 257812500, + /* rx_par_clk */ + 257812500, + /* tx_pcs_clk */ + 156250000, /* 156.25 MHz */ + /* rx_pcs_clk */ + 156250000, /* 156.25 MHz */ + /* tx_desk_rsgb_par */ + 0, /* unused */ + /* rx_desk_rsgb_par */ + 156250000, /* 156.25 MHz deskew clock */ + /* tx_desk_rsgb_pcs */ + 0, /* unused */ + /* rx_desk_rsgb_pcs */ + 156250000, /* 156.25 MHz deskew clock */ + /* tx_fixed_delay */ + 5666, + /* pmd_adj_divisor */ + 82500000, + /* rx_fixed_delay */ + 4244, + }, + /* ICE_PTP_LNK_SPD_50G */ + { + /* tx_par_clk */ + 644531250, /* 644.53125 MHZ */ + /* rx_par_clk */ + 644531250, /* 644.53125 MHZ */ + /* tx_pcs_clk */ + 390625000, /* 390.625 MHz */ + /* rx_pcs_clk */ + 390625000, /* 390.625 MHz */ + /* tx_desk_rsgb_par */ + 0, /* unused */ + /* rx_desk_rsgb_par */ + 195312500, /* 193.3125 MHz deskew clock */ + /* tx_desk_rsgb_pcs */ + 0, /* unused */ + /* rx_desk_rsgb_pcs */ + 195312500, /* 193.3125 MHz deskew clock */ + /* tx_fixed_delay */ + 2778, + /* pmd_adj_divisor */ + 206250000, + /* rx_fixed_delay */ + 2868, + }, + /* ICE_PTP_LNK_SPD_50G_RS */ + { + /* tx_par_clk */ + 0, /* unused */ + /* rx_par_clk */ + 644531250, /* 644.53125 MHz */ + /* tx_pcs_clk */ + 0, /* unused */ + /* rx_pcs_clk */ + 644531250, /* 644.53125 MHz */ + /* tx_desk_rsgb_par */ + 322265625, /* 322.265625 MHz Reed Solomon gearbox */ + /* rx_desk_rsgb_par */ + 322265625, /* 322.265625 MHz Reed Solomon gearbox */ + /* tx_desk_rsgb_pcs */ + 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + /* rx_desk_rsgb_pcs */ + 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + /* tx_fixed_delay */ + 2095, + /* pmd_adj_divisor */ + 206250000, + /* rx_fixed_delay */ + 14524, + }, + /* ICE_PTP_LNK_SPD_100G_RS */ + { + /* tx_par_clk */ + 0, /* unused */ + /* rx_par_clk */ + 644531250, /* 644.53125 MHz */ + /* tx_pcs_clk */ + 0, /* unused */ + /* rx_pcs_clk */ + 644531250, /* 644.53125 MHz */ + /* tx_desk_rsgb_par */ + 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + /* rx_desk_rsgb_par */ + 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + /* tx_desk_rsgb_pcs */ + 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + /* rx_desk_rsgb_pcs */ + 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + /* tx_fixed_delay */ + 1620, + /* pmd_adj_divisor */ + 206250000, + /* rx_fixed_delay */ + 7775, + }, +}; + +#endif /* _ICE_PTP_CONSTS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 29f947c0cd2e..ec8450f034e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -3,6 +3,8 @@ #include "ice_common.h" #include "ice_ptp_hw.h" +#include "ice_ptp_consts.h" +#include "ice_cgu_regs.h" /* Low level functions for interacting with and managing the device clock used * for the Precision Time Protocol. @@ -29,6 +31,15 @@ * * For E810 devices, the increment frequency is 812.5 MHz * + * For E822 devices the clock can be derived from different sources, and the + * increment has an effective frequency of one of the following: + * - 823.4375 MHz + * - 783.36 MHz + * - 796.875 MHz + * - 816 MHz + * - 830.078125 MHz + * - 783.36 MHz + * * The hardware captures timestamps in the PHY for incoming packets, and for * outgoing packets on request. To support this, the PHY maintains a timer * that matches the lower 64 bits of the global source timer. @@ -37,6 +48,24 @@ * shadow registers are used to prepare the desired initial values. A special * sync command is issued to trigger copying from the shadow registers into * the appropriate source and PHY registers simultaneously. + * + * The driver supports devices which have different PHYs with subtly different + * mechanisms to program and control the timers. We divide the devices into + * families named after the first major device, E810 and similar devices, and + * E822 and similar devices. + * + * - E822 based devices have additional support for fine grained Vernier + * calibration which requires significant setup + * - The layout of timestamp data in the PHY register blocks is different + * - The way timer synchronization commands are issued is different. + * + * To support this, very low level functions have an e810 or e822 suffix + * indicating what type of device they work on. Higher level abstractions for + * tasks that can be done on both devices do not have the suffix and will + * correctly look up the appropriate low level function when running. + * + * Functions which only make sense on a single device family may not have + * a suitable generic implementation */ /** @@ -51,6 +80,2447 @@ u8 ice_get_ptp_src_clock_index(struct ice_hw *hw) return hw->func_caps.ts_func_info.tmr_index_assoc; } +/** + * ice_ptp_read_src_incval - Read source timer increment value + * @hw: pointer to HW struct + * + * Read the increment value of the source timer and return it. + */ +static u64 ice_ptp_read_src_incval(struct ice_hw *hw) +{ + u32 lo, hi; + u8 tmr_idx; + + tmr_idx = ice_get_ptp_src_clock_index(hw); + + lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx)); + hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx)); + + return ((u64)(hi & INCVAL_HIGH_M) << 32) | lo; +} + +/** + * ice_ptp_src_cmd - Prepare source timer for a timer command + * @hw: pointer to HW structure + * @cmd: Timer command + * + * Prepare the source timer for an upcoming timer sync command. + */ +static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + u32 cmd_val; + u8 tmr_idx; + + tmr_idx = ice_get_ptp_src_clock_index(hw); + cmd_val = tmr_idx << SEL_CPK_SRC; + + switch (cmd) { + case INIT_TIME: + cmd_val |= GLTSYN_CMD_INIT_TIME; + break; + case INIT_INCVAL: + cmd_val |= GLTSYN_CMD_INIT_INCVAL; + break; + case ADJ_TIME: + cmd_val |= GLTSYN_CMD_ADJ_TIME; + break; + case ADJ_TIME_AT_TIME: + cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME; + break; + case READ_TIME: + cmd_val |= GLTSYN_CMD_READ_TIME; + break; + } + + wr32(hw, GLTSYN_CMD, cmd_val); +} + +/** + * ice_ptp_exec_tmr_cmd - Execute all prepared timer commands + * @hw: pointer to HW struct + * + * Write the SYNC_EXEC_CMD bit to the GLTSYN_CMD_SYNC register, and flush the + * write immediately. This triggers the hardware to begin executing all of the + * source and PHY timer commands synchronously. + */ +static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw) +{ + wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD); + ice_flush(hw); +} + +/* E822 family functions + * + * The following functions operate on the E822 family of devices. + */ + +/** + * ice_fill_phy_msg_e822 - Fill message data for a PHY register access + * @msg: the PHY message buffer to fill in + * @port: the port to access + * @offset: the register offset + */ +static void +ice_fill_phy_msg_e822(struct ice_sbq_msg_input *msg, u8 port, u16 offset) +{ + int phy_port, phy, quadtype; + + phy_port = port % ICE_PORTS_PER_PHY; + phy = port / ICE_PORTS_PER_PHY; + quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_NUM_QUAD_TYPE; + + if (quadtype == 0) { + msg->msg_addr_low = P_Q0_L(P_0_BASE + offset, phy_port); + msg->msg_addr_high = P_Q0_H(P_0_BASE + offset, phy_port); + } else { + msg->msg_addr_low = P_Q1_L(P_4_BASE + offset, phy_port); + msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port); + } + + if (phy == 0) + msg->dest_dev = rmn_0; + else if (phy == 1) + msg->dest_dev = rmn_1; + else + msg->dest_dev = rmn_2; +} + +/** + * ice_is_64b_phy_reg_e822 - Check if this is a 64bit PHY register + * @low_addr: the low address to check + * @high_addr: on return, contains the high address of the 64bit register + * + * Checks if the provided low address is one of the known 64bit PHY values + * represented as two 32bit registers. If it is, return the appropriate high + * register offset to use. + */ +static bool ice_is_64b_phy_reg_e822(u16 low_addr, u16 *high_addr) +{ + switch (low_addr) { + case P_REG_PAR_PCS_TX_OFFSET_L: + *high_addr = P_REG_PAR_PCS_TX_OFFSET_U; + return true; + case P_REG_PAR_PCS_RX_OFFSET_L: + *high_addr = P_REG_PAR_PCS_RX_OFFSET_U; + return true; + case P_REG_PAR_TX_TIME_L: + *high_addr = P_REG_PAR_TX_TIME_U; + return true; + case P_REG_PAR_RX_TIME_L: + *high_addr = P_REG_PAR_RX_TIME_U; + return true; + case P_REG_TOTAL_TX_OFFSET_L: + *high_addr = P_REG_TOTAL_TX_OFFSET_U; + return true; + case P_REG_TOTAL_RX_OFFSET_L: + *high_addr = P_REG_TOTAL_RX_OFFSET_U; + return true; + case P_REG_UIX66_10G_40G_L: + *high_addr = P_REG_UIX66_10G_40G_U; + return true; + case P_REG_UIX66_25G_100G_L: + *high_addr = P_REG_UIX66_25G_100G_U; + return true; + case P_REG_TX_CAPTURE_L: + *high_addr = P_REG_TX_CAPTURE_U; + return true; + case P_REG_RX_CAPTURE_L: + *high_addr = P_REG_RX_CAPTURE_U; + return true; + case P_REG_TX_TIMER_INC_PRE_L: + *high_addr = P_REG_TX_TIMER_INC_PRE_U; + return true; + case P_REG_RX_TIMER_INC_PRE_L: + *high_addr = P_REG_RX_TIMER_INC_PRE_U; + return true; + default: + return false; + } +} + +/** + * ice_is_40b_phy_reg_e822 - Check if this is a 40bit PHY register + * @low_addr: the low address to check + * @high_addr: on return, contains the high address of the 40bit value + * + * Checks if the provided low address is one of the known 40bit PHY values + * split into two registers with the lower 8 bits in the low register and the + * upper 32 bits in the high register. If it is, return the appropriate high + * register offset to use. + */ +static bool ice_is_40b_phy_reg_e822(u16 low_addr, u16 *high_addr) +{ + switch (low_addr) { + case P_REG_TIMETUS_L: + *high_addr = P_REG_TIMETUS_U; + return true; + case P_REG_PAR_RX_TUS_L: + *high_addr = P_REG_PAR_RX_TUS_U; + return true; + case P_REG_PAR_TX_TUS_L: + *high_addr = P_REG_PAR_TX_TUS_U; + return true; + case P_REG_PCS_RX_TUS_L: + *high_addr = P_REG_PCS_RX_TUS_U; + return true; + case P_REG_PCS_TX_TUS_L: + *high_addr = P_REG_PCS_TX_TUS_U; + return true; + case P_REG_DESK_PAR_RX_TUS_L: + *high_addr = P_REG_DESK_PAR_RX_TUS_U; + return true; + case P_REG_DESK_PAR_TX_TUS_L: + *high_addr = P_REG_DESK_PAR_TX_TUS_U; + return true; + case P_REG_DESK_PCS_RX_TUS_L: + *high_addr = P_REG_DESK_PCS_RX_TUS_U; + return true; + case P_REG_DESK_PCS_TX_TUS_L: + *high_addr = P_REG_DESK_PCS_TX_TUS_U; + return true; + default: + return false; + } +} + +/** + * ice_read_phy_reg_e822 - Read a PHY register + * @hw: pointer to the HW struct + * @port: PHY port to read from + * @offset: PHY register offset to read + * @val: on return, the contents read from the PHY + * + * Read a PHY register for the given port over the device sideband queue. + */ +int +ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val) +{ + struct ice_sbq_msg_input msg = {0}; + int err; + + ice_fill_phy_msg_e822(&msg, port, offset); + msg.opcode = ice_sbq_msg_rd; + + err = ice_sbq_rw_reg(hw, &msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", + err); + return err; + } + + *val = msg.data; + + return 0; +} + +/** + * ice_read_64b_phy_reg_e822 - Read a 64bit value from PHY registers + * @hw: pointer to the HW struct + * @port: PHY port to read from + * @low_addr: offset of the lower register to read from + * @val: on return, the contents of the 64bit value from the PHY registers + * + * Reads the two registers associated with a 64bit value and returns it in the + * val pointer. The offset always specifies the lower register offset to use. + * The high offset is looked up. This function only operates on registers + * known to be two parts of a 64bit value. + */ +static int +ice_read_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 *val) +{ + u32 low, high; + u16 high_addr; + int err; + + /* Only operate on registers known to be split into two 32bit + * registers. + */ + if (!ice_is_64b_phy_reg_e822(low_addr, &high_addr)) { + ice_debug(hw, ICE_DBG_PTP, "Invalid 64b register addr 0x%08x\n", + low_addr); + return -EINVAL; + } + + err = ice_read_phy_reg_e822(hw, port, low_addr, &low); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read from low register 0x%08x\n, err %d", + low_addr, err); + return err; + } + + err = ice_read_phy_reg_e822(hw, port, high_addr, &high); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read from high register 0x%08x\n, err %d", + high_addr, err); + return err; + } + + *val = (u64)high << 32 | low; + + return 0; +} + +/** + * ice_write_phy_reg_e822 - Write a PHY register + * @hw: pointer to the HW struct + * @port: PHY port to write to + * @offset: PHY register offset to write + * @val: The value to write to the register + * + * Write a PHY register for the given port over the device sideband queue. + */ +int +ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val) +{ + struct ice_sbq_msg_input msg = {0}; + int err; + + ice_fill_phy_msg_e822(&msg, port, offset); + msg.opcode = ice_sbq_msg_wr; + msg.data = val; + + err = ice_sbq_rw_reg(hw, &msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", + err); + return err; + } + + return 0; +} + +/** + * ice_write_40b_phy_reg_e822 - Write a 40b value to the PHY + * @hw: pointer to the HW struct + * @port: port to write to + * @low_addr: offset of the low register + * @val: 40b value to write + * + * Write the provided 40b value to the two associated registers by splitting + * it up into two chunks, the lower 8 bits and the upper 32 bits. + */ +static int +ice_write_40b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val) +{ + u32 low, high; + u16 high_addr; + int err; + + /* Only operate on registers known to be split into a lower 8 bit + * register and an upper 32 bit register. + */ + if (!ice_is_40b_phy_reg_e822(low_addr, &high_addr)) { + ice_debug(hw, ICE_DBG_PTP, "Invalid 40b register addr 0x%08x\n", + low_addr); + return -EINVAL; + } + + low = (u32)(val & P_REG_40B_LOW_M); + high = (u32)(val >> P_REG_40B_HIGH_S); + + err = ice_write_phy_reg_e822(hw, port, low_addr, low); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write to low register 0x%08x\n, err %d", + low_addr, err); + return err; + } + + err = ice_write_phy_reg_e822(hw, port, high_addr, high); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write to high register 0x%08x\n, err %d", + high_addr, err); + return err; + } + + return 0; +} + +/** + * ice_write_64b_phy_reg_e822 - Write a 64bit value to PHY registers + * @hw: pointer to the HW struct + * @port: PHY port to read from + * @low_addr: offset of the lower register to read from + * @val: the contents of the 64bit value to write to PHY + * + * Write the 64bit value to the two associated 32bit PHY registers. The offset + * is always specified as the lower register, and the high address is looked + * up. This function only operates on registers known to be two parts of + * a 64bit value. + */ +static int +ice_write_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val) +{ + u32 low, high; + u16 high_addr; + int err; + + /* Only operate on registers known to be split into two 32bit + * registers. + */ + if (!ice_is_64b_phy_reg_e822(low_addr, &high_addr)) { + ice_debug(hw, ICE_DBG_PTP, "Invalid 64b register addr 0x%08x\n", + low_addr); + return -EINVAL; + } + + low = lower_32_bits(val); + high = upper_32_bits(val); + + err = ice_write_phy_reg_e822(hw, port, low_addr, low); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write to low register 0x%08x\n, err %d", + low_addr, err); + return err; + } + + err = ice_write_phy_reg_e822(hw, port, high_addr, high); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write to high register 0x%08x\n, err %d", + high_addr, err); + return err; + } + + return 0; +} + +/** + * ice_fill_quad_msg_e822 - Fill message data for quad register access + * @msg: the PHY message buffer to fill in + * @quad: the quad to access + * @offset: the register offset + * + * Fill a message buffer for accessing a register in a quad shared between + * multiple PHYs. + */ +static void +ice_fill_quad_msg_e822(struct ice_sbq_msg_input *msg, u8 quad, u16 offset) +{ + u32 addr; + + msg->dest_dev = rmn_0; + + if ((quad % ICE_NUM_QUAD_TYPE) == 0) + addr = Q_0_BASE + offset; + else + addr = Q_1_BASE + offset; + + msg->msg_addr_low = lower_16_bits(addr); + msg->msg_addr_high = upper_16_bits(addr); +} + +/** + * ice_read_quad_reg_e822 - Read a PHY quad register + * @hw: pointer to the HW struct + * @quad: quad to read from + * @offset: quad register offset to read + * @val: on return, the contents read from the quad + * + * Read a quad register over the device sideband queue. Quad registers are + * shared between multiple PHYs. + */ +int +ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val) +{ + struct ice_sbq_msg_input msg = {0}; + int err; + + if (quad >= ICE_MAX_QUAD) + return -EINVAL; + + ice_fill_quad_msg_e822(&msg, quad, offset); + msg.opcode = ice_sbq_msg_rd; + + err = ice_sbq_rw_reg(hw, &msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", + err); + return err; + } + + *val = msg.data; + + return 0; +} + +/** + * ice_write_quad_reg_e822 - Write a PHY quad register + * @hw: pointer to the HW struct + * @quad: quad to write to + * @offset: quad register offset to write + * @val: The value to write to the register + * + * Write a quad register over the device sideband queue. Quad registers are + * shared between multiple PHYs. + */ +int +ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val) +{ + struct ice_sbq_msg_input msg = {0}; + int err; + + if (quad >= ICE_MAX_QUAD) + return -EINVAL; + + ice_fill_quad_msg_e822(&msg, quad, offset); + msg.opcode = ice_sbq_msg_wr; + msg.data = val; + + err = ice_sbq_rw_reg(hw, &msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", + err); + return err; + } + + return 0; +} + +/** + * ice_read_phy_tstamp_e822 - Read a PHY timestamp out of the quad block + * @hw: pointer to the HW struct + * @quad: the quad to read from + * @idx: the timestamp index to read + * @tstamp: on return, the 40bit timestamp value + * + * Read a 40bit timestamp value out of the two associated registers in the + * quad memory block that is shared between the internal PHYs of the E822 + * family of devices. + */ +static int +ice_read_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp) +{ + u16 lo_addr, hi_addr; + u32 lo, hi; + int err; + + lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx); + hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx); + + err = ice_read_quad_reg_e822(hw, quad, lo_addr, &lo); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, err %d\n", + err); + return err; + } + + err = ice_read_quad_reg_e822(hw, quad, hi_addr, &hi); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, err %d\n", + err); + return err; + } + + /* For E822 based internal PHYs, the timestamp is reported with the + * lower 8 bits in the low register, and the upper 32 bits in the high + * register. + */ + *tstamp = ((u64)hi) << TS_PHY_HIGH_S | ((u64)lo & TS_PHY_LOW_M); + + return 0; +} + +/** + * ice_clear_phy_tstamp_e822 - Clear a timestamp from the quad block + * @hw: pointer to the HW struct + * @quad: the quad to read from + * @idx: the timestamp index to reset + * + * Clear a timestamp, resetting its valid bit, from the PHY quad block that is + * shared between the internal PHYs on the E822 devices. + */ +static int +ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx) +{ + u16 lo_addr, hi_addr; + int err; + + lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx); + hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx); + + err = ice_write_quad_reg_e822(hw, quad, lo_addr, 0); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n", + err); + return err; + } + + err = ice_write_quad_reg_e822(hw, quad, hi_addr, 0); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n", + err); + return err; + } + + return 0; +} + +/** + * ice_read_cgu_reg_e822 - Read a CGU register + * @hw: pointer to the HW struct + * @addr: Register address to read + * @val: storage for register value read + * + * Read the contents of a register of the Clock Generation Unit. Only + * applicable to E822 devices. + */ +static int +ice_read_cgu_reg_e822(struct ice_hw *hw, u32 addr, u32 *val) +{ + struct ice_sbq_msg_input cgu_msg; + int err; + + cgu_msg.opcode = ice_sbq_msg_rd; + cgu_msg.dest_dev = cgu; + cgu_msg.msg_addr_low = addr; + cgu_msg.msg_addr_high = 0x0; + + err = ice_sbq_rw_reg(hw, &cgu_msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read CGU register 0x%04x, err %d\n", + addr, err); + return err; + } + + *val = cgu_msg.data; + + return err; +} + +/** + * ice_write_cgu_reg_e822 - Write a CGU register + * @hw: pointer to the HW struct + * @addr: Register address to write + * @val: value to write into the register + * + * Write the specified value to a register of the Clock Generation Unit. Only + * applicable to E822 devices. + */ +static int +ice_write_cgu_reg_e822(struct ice_hw *hw, u32 addr, u32 val) +{ + struct ice_sbq_msg_input cgu_msg; + int err; + + cgu_msg.opcode = ice_sbq_msg_wr; + cgu_msg.dest_dev = cgu; + cgu_msg.msg_addr_low = addr; + cgu_msg.msg_addr_high = 0x0; + cgu_msg.data = val; + + err = ice_sbq_rw_reg(hw, &cgu_msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write CGU register 0x%04x, err %d\n", + addr, err); + return err; + } + + return err; +} + +/** + * ice_clk_freq_str - Convert time_ref_freq to string + * @clk_freq: Clock frequency + * + * Convert the specified TIME_REF clock frequency to a string. + */ +static const char *ice_clk_freq_str(u8 clk_freq) +{ + switch ((enum ice_time_ref_freq)clk_freq) { + case ICE_TIME_REF_FREQ_25_000: + return "25 MHz"; + case ICE_TIME_REF_FREQ_122_880: + return "122.88 MHz"; + case ICE_TIME_REF_FREQ_125_000: + return "125 MHz"; + case ICE_TIME_REF_FREQ_153_600: + return "153.6 MHz"; + case ICE_TIME_REF_FREQ_156_250: + return "156.25 MHz"; + case ICE_TIME_REF_FREQ_245_760: + return "245.76 MHz"; + default: + return "Unknown"; + } +} + +/** + * ice_clk_src_str - Convert time_ref_src to string + * @clk_src: Clock source + * + * Convert the specified clock source to its string name. + */ +static const char *ice_clk_src_str(u8 clk_src) +{ + switch ((enum ice_clk_src)clk_src) { + case ICE_CLK_SRC_TCX0: + return "TCX0"; + case ICE_CLK_SRC_TIME_REF: + return "TIME_REF"; + default: + return "Unknown"; + } +} + +/** + * ice_cfg_cgu_pll_e822 - Configure the Clock Generation Unit + * @hw: pointer to the HW struct + * @clk_freq: Clock frequency to program + * @clk_src: Clock source to select (TIME_REF, or TCX0) + * + * Configure the Clock Generation Unit with the desired clock frequency and + * time reference, enabling the PLL which drives the PTP hardware clock. + */ +static int +ice_cfg_cgu_pll_e822(struct ice_hw *hw, enum ice_time_ref_freq clk_freq, + enum ice_clk_src clk_src) +{ + union tspll_ro_bwm_lf bwm_lf; + union nac_cgu_dword19 dw19; + union nac_cgu_dword22 dw22; + union nac_cgu_dword24 dw24; + union nac_cgu_dword9 dw9; + int err; + + if (clk_freq >= NUM_ICE_TIME_REF_FREQ) { + dev_warn(ice_hw_to_dev(hw), "Invalid TIME_REF frequency %u\n", + clk_freq); + return -EINVAL; + } + + if (clk_src >= NUM_ICE_CLK_SRC) { + dev_warn(ice_hw_to_dev(hw), "Invalid clock source %u\n", + clk_src); + return -EINVAL; + } + + if (clk_src == ICE_CLK_SRC_TCX0 && + clk_freq != ICE_TIME_REF_FREQ_25_000) { + dev_warn(ice_hw_to_dev(hw), + "TCX0 only supports 25 MHz frequency\n"); + return -EINVAL; + } + + err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD9, &dw9.val); + if (err) + return err; + + err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD24, &dw24.val); + if (err) + return err; + + err = ice_read_cgu_reg_e822(hw, TSPLL_RO_BWM_LF, &bwm_lf.val); + if (err) + return err; + + /* Log the current clock configuration */ + ice_debug(hw, ICE_DBG_PTP, "Current CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n", + dw24.field.ts_pll_enable ? "enabled" : "disabled", + ice_clk_src_str(dw24.field.time_ref_sel), + ice_clk_freq_str(dw9.field.time_ref_freq_sel), + bwm_lf.field.plllock_true_lock_cri ? "locked" : "unlocked"); + + /* Disable the PLL before changing the clock source or frequency */ + if (dw24.field.ts_pll_enable) { + dw24.field.ts_pll_enable = 0; + + err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val); + if (err) + return err; + } + + /* Set the frequency */ + dw9.field.time_ref_freq_sel = clk_freq; + err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD9, dw9.val); + if (err) + return err; + + /* Configure the TS PLL feedback divisor */ + err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD19, &dw19.val); + if (err) + return err; + + dw19.field.tspll_fbdiv_intgr = e822_cgu_params[clk_freq].feedback_div; + dw19.field.tspll_ndivratio = 1; + + err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD19, dw19.val); + if (err) + return err; + + /* Configure the TS PLL post divisor */ + err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD22, &dw22.val); + if (err) + return err; + + dw22.field.time1588clk_div = e822_cgu_params[clk_freq].post_pll_div; + dw22.field.time1588clk_sel_div2 = 0; + + err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD22, dw22.val); + if (err) + return err; + + /* Configure the TS PLL pre divisor and clock source */ + err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD24, &dw24.val); + if (err) + return err; + + dw24.field.ref1588_ck_div = e822_cgu_params[clk_freq].refclk_pre_div; + dw24.field.tspll_fbdiv_frac = e822_cgu_params[clk_freq].frac_n_div; + dw24.field.time_ref_sel = clk_src; + + err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val); + if (err) + return err; + + /* Finally, enable the PLL */ + dw24.field.ts_pll_enable = 1; + + err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val); + if (err) + return err; + + /* Wait to verify if the PLL locks */ + usleep_range(1000, 5000); + + err = ice_read_cgu_reg_e822(hw, TSPLL_RO_BWM_LF, &bwm_lf.val); + if (err) + return err; + + if (!bwm_lf.field.plllock_true_lock_cri) { + dev_warn(ice_hw_to_dev(hw), "CGU PLL failed to lock\n"); + return -EBUSY; + } + + /* Log the current clock configuration */ + ice_debug(hw, ICE_DBG_PTP, "New CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n", + dw24.field.ts_pll_enable ? "enabled" : "disabled", + ice_clk_src_str(dw24.field.time_ref_sel), + ice_clk_freq_str(dw9.field.time_ref_freq_sel), + bwm_lf.field.plllock_true_lock_cri ? "locked" : "unlocked"); + + return 0; +} + +/** + * ice_init_cgu_e822 - Initialize CGU with settings from firmware + * @hw: pointer to the HW structure + * + * Initialize the Clock Generation Unit of the E822 device. + */ +static int ice_init_cgu_e822(struct ice_hw *hw) +{ + struct ice_ts_func_info *ts_info = &hw->func_caps.ts_func_info; + union tspll_cntr_bist_settings cntr_bist; + int err; + + err = ice_read_cgu_reg_e822(hw, TSPLL_CNTR_BIST_SETTINGS, + &cntr_bist.val); + if (err) + return err; + + /* Disable sticky lock detection so lock err reported is accurate */ + cntr_bist.field.i_plllock_sel_0 = 0; + cntr_bist.field.i_plllock_sel_1 = 0; + + err = ice_write_cgu_reg_e822(hw, TSPLL_CNTR_BIST_SETTINGS, + cntr_bist.val); + if (err) + return err; + + /* Configure the CGU PLL using the parameters from the function + * capabilities. + */ + err = ice_cfg_cgu_pll_e822(hw, ts_info->time_ref, + (enum ice_clk_src)ts_info->clk_src); + if (err) + return err; + + return 0; +} + +/** + * ice_ptp_set_vernier_wl - Set the window length for vernier calibration + * @hw: pointer to the HW struct + * + * Set the window length used for the vernier port calibration process. + */ +static int ice_ptp_set_vernier_wl(struct ice_hw *hw) +{ + u8 port; + + for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { + int err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_WL, + PTP_VERNIER_WL); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to set vernier window length for port %u, err %d\n", + port, err); + return err; + } + } + + return 0; +} + +/** + * ice_ptp_init_phc_e822 - Perform E822 specific PHC initialization + * @hw: pointer to HW struct + * + * Perform PHC initialization steps specific to E822 devices. + */ +static int ice_ptp_init_phc_e822(struct ice_hw *hw) +{ + int err; + u32 regval; + + /* Enable reading switch and PHY registers over the sideband queue */ +#define PF_SB_REM_DEV_CTL_SWITCH_READ BIT(1) +#define PF_SB_REM_DEV_CTL_PHY0 BIT(2) + regval = rd32(hw, PF_SB_REM_DEV_CTL); + regval |= (PF_SB_REM_DEV_CTL_SWITCH_READ | + PF_SB_REM_DEV_CTL_PHY0); + wr32(hw, PF_SB_REM_DEV_CTL, regval); + + /* Initialize the Clock Generation Unit */ + err = ice_init_cgu_e822(hw); + if (err) + return err; + + /* Set window length for all the ports */ + return ice_ptp_set_vernier_wl(hw); +} + +/** + * ice_ptp_prep_phy_time_e822 - Prepare PHY port with initial time + * @hw: pointer to the HW struct + * @time: Time to initialize the PHY port clocks to + * + * Program the PHY port registers with a new initial time value. The port + * clock will be initialized once the driver issues an INIT_TIME sync + * command. The time value is the upper 32 bits of the PHY timer, usually in + * units of nominal nanoseconds. + */ +static int +ice_ptp_prep_phy_time_e822(struct ice_hw *hw, u32 time) +{ + u64 phy_time; + u8 port; + int err; + + /* The time represents the upper 32 bits of the PHY timer, so we need + * to shift to account for this when programming. + */ + phy_time = (u64)time << 32; + + for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { + /* Tx case */ + err = ice_write_64b_phy_reg_e822(hw, port, + P_REG_TX_TIMER_INC_PRE_L, + phy_time); + if (err) + goto exit_err; + + /* Rx case */ + err = ice_write_64b_phy_reg_e822(hw, port, + P_REG_RX_TIMER_INC_PRE_L, + phy_time); + if (err) + goto exit_err; + } + + return 0; + +exit_err: + ice_debug(hw, ICE_DBG_PTP, "Failed to write init time for port %u, err %d\n", + port, err); + + return err; +} + +/** + * ice_ptp_prep_port_adj_e822 - Prepare a single port for time adjust + * @hw: pointer to HW struct + * @port: Port number to be programmed + * @time: time in cycles to adjust the port Tx and Rx clocks + * + * Program the port for an atomic adjustment by writing the Tx and Rx timer + * registers. The atomic adjustment won't be completed until the driver issues + * an ADJ_TIME command. + * + * Note that time is not in units of nanoseconds. It is in clock time + * including the lower sub-nanosecond portion of the port timer. + * + * Negative adjustments are supported using 2s complement arithmetic. + */ +int +ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time) +{ + u32 l_time, u_time; + int err; + + l_time = lower_32_bits(time); + u_time = upper_32_bits(time); + + /* Tx case */ + err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TIMER_INC_PRE_L, + l_time); + if (err) + goto exit_err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TIMER_INC_PRE_U, + u_time); + if (err) + goto exit_err; + + /* Rx case */ + err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TIMER_INC_PRE_L, + l_time); + if (err) + goto exit_err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TIMER_INC_PRE_U, + u_time); + if (err) + goto exit_err; + + return 0; + +exit_err: + ice_debug(hw, ICE_DBG_PTP, "Failed to write time adjust for port %u, err %d\n", + port, err); + return err; +} + +/** + * ice_ptp_prep_phy_adj_e822 - Prep PHY ports for a time adjustment + * @hw: pointer to HW struct + * @adj: adjustment in nanoseconds + * + * Prepare the PHY ports for an atomic time adjustment by programming the PHY + * Tx and Rx port registers. The actual adjustment is completed by issuing an + * ADJ_TIME or ADJ_TIME_AT_TIME sync command. + */ +static int +ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj) +{ + s64 cycles; + u8 port; + + /* The port clock supports adjustment of the sub-nanosecond portion of + * the clock. We shift the provided adjustment in nanoseconds to + * calculate the appropriate adjustment to program into the PHY ports. + */ + if (adj > 0) + cycles = (s64)adj << 32; + else + cycles = -(((s64)-adj) << 32); + + for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { + int err; + + err = ice_ptp_prep_port_adj_e822(hw, port, cycles); + if (err) + return err; + } + + return 0; +} + +/** + * ice_ptp_prep_phy_incval_e822 - Prepare PHY ports for time adjustment + * @hw: pointer to HW struct + * @incval: new increment value to prepare + * + * Prepare each of the PHY ports for a new increment value by programming the + * port's TIMETUS registers. The new increment value will be updated after + * issuing an INIT_INCVAL command. + */ +static int +ice_ptp_prep_phy_incval_e822(struct ice_hw *hw, u64 incval) +{ + int err; + u8 port; + + for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_TIMETUS_L, + incval); + if (err) + goto exit_err; + } + + return 0; + +exit_err: + ice_debug(hw, ICE_DBG_PTP, "Failed to write incval for port %u, err %d\n", + port, err); + + return err; +} + +/** + * ice_ptp_read_port_capture - Read a port's local time capture + * @hw: pointer to HW struct + * @port: Port number to read + * @tx_ts: on return, the Tx port time capture + * @rx_ts: on return, the Rx port time capture + * + * Read the port's Tx and Rx local time capture values. + * + * Note this has no equivalent for the E810 devices. + */ +static int +ice_ptp_read_port_capture(struct ice_hw *hw, u8 port, u64 *tx_ts, u64 *rx_ts) +{ + int err; + + /* Tx case */ + err = ice_read_64b_phy_reg_e822(hw, port, P_REG_TX_CAPTURE_L, tx_ts); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read REG_TX_CAPTURE, err %d\n", + err); + return err; + } + + ice_debug(hw, ICE_DBG_PTP, "tx_init = 0x%016llx\n", + (unsigned long long)*tx_ts); + + /* Rx case */ + err = ice_read_64b_phy_reg_e822(hw, port, P_REG_RX_CAPTURE_L, rx_ts); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_CAPTURE, err %d\n", + err); + return err; + } + + ice_debug(hw, ICE_DBG_PTP, "rx_init = 0x%016llx\n", + (unsigned long long)*rx_ts); + + return 0; +} + +/** + * ice_ptp_one_port_cmd - Prepare a single PHY port for a timer command + * @hw: pointer to HW struct + * @port: Port to which cmd has to be sent + * @cmd: Command to be sent to the port + * + * Prepare the requested port for an upcoming timer sync command. + * + * Note there is no equivalent of this operation on E810, as that device + * always handles all external PHYs internally. + */ +static int +ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) +{ + u32 cmd_val, val; + u8 tmr_idx; + int err; + + tmr_idx = ice_get_ptp_src_clock_index(hw); + cmd_val = tmr_idx << SEL_PHY_SRC; + switch (cmd) { + case INIT_TIME: + cmd_val |= PHY_CMD_INIT_TIME; + break; + case INIT_INCVAL: + cmd_val |= PHY_CMD_INIT_INCVAL; + break; + case ADJ_TIME: + cmd_val |= PHY_CMD_ADJ_TIME; + break; + case READ_TIME: + cmd_val |= PHY_CMD_READ_TIME; + break; + case ADJ_TIME_AT_TIME: + cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME; + break; + } + + /* Tx case */ + /* Read, modify, write */ + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_TMR_CMD, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_TMR_CMD, err %d\n", + err); + return err; + } + + /* Modify necessary bits only and perform write */ + val &= ~TS_CMD_MASK; + val |= cmd_val; + + err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TMR_CMD, val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write back TX_TMR_CMD, err %d\n", + err); + return err; + } + + /* Rx case */ + /* Read, modify, write */ + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_TMR_CMD, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_TMR_CMD, err %d\n", + err); + return err; + } + + /* Modify necessary bits only and perform write */ + val &= ~TS_CMD_MASK; + val |= cmd_val; + + err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TMR_CMD, val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write back RX_TMR_CMD, err %d\n", + err); + return err; + } + + return 0; +} + +/** + * ice_ptp_port_cmd_e822 - Prepare all ports for a timer command + * @hw: pointer to the HW struct + * @cmd: timer command to prepare + * + * Prepare all ports connected to this device for an upcoming timer sync + * command. + */ +static int +ice_ptp_port_cmd_e822(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + u8 port; + + for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) { + int err; + + err = ice_ptp_one_port_cmd(hw, port, cmd); + if (err) + return err; + } + + return 0; +} + +/* E822 Vernier calibration functions + * + * The following functions are used as part of the vernier calibration of + * a port. This calibration increases the precision of the timestamps on the + * port. + */ + +/** + * ice_phy_get_speed_and_fec_e822 - Get link speed and FEC based on serdes mode + * @hw: pointer to HW struct + * @port: the port to read from + * @link_out: if non-NULL, holds link speed on success + * @fec_out: if non-NULL, holds FEC algorithm on success + * + * Read the serdes data for the PHY port and extract the link speed and FEC + * algorithm. + */ +static int +ice_phy_get_speed_and_fec_e822(struct ice_hw *hw, u8 port, + enum ice_ptp_link_spd *link_out, + enum ice_ptp_fec_mode *fec_out) +{ + enum ice_ptp_link_spd link; + enum ice_ptp_fec_mode fec; + u32 serdes; + int err; + + err = ice_read_phy_reg_e822(hw, port, P_REG_LINK_SPEED, &serdes); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read serdes info\n"); + return err; + } + + /* Determine the FEC algorithm */ + fec = (enum ice_ptp_fec_mode)P_REG_LINK_SPEED_FEC_MODE(serdes); + + serdes &= P_REG_LINK_SPEED_SERDES_M; + + /* Determine the link speed */ + if (fec == ICE_PTP_FEC_MODE_RS_FEC) { + switch (serdes) { + case ICE_PTP_SERDES_25G: + link = ICE_PTP_LNK_SPD_25G_RS; + break; + case ICE_PTP_SERDES_50G: + link = ICE_PTP_LNK_SPD_50G_RS; + break; + case ICE_PTP_SERDES_100G: + link = ICE_PTP_LNK_SPD_100G_RS; + break; + default: + return -EIO; + } + } else { + switch (serdes) { + case ICE_PTP_SERDES_1G: + link = ICE_PTP_LNK_SPD_1G; + break; + case ICE_PTP_SERDES_10G: + link = ICE_PTP_LNK_SPD_10G; + break; + case ICE_PTP_SERDES_25G: + link = ICE_PTP_LNK_SPD_25G; + break; + case ICE_PTP_SERDES_40G: + link = ICE_PTP_LNK_SPD_40G; + break; + case ICE_PTP_SERDES_50G: + link = ICE_PTP_LNK_SPD_50G; + break; + default: + return -EIO; + } + } + + if (link_out) + *link_out = link; + if (fec_out) + *fec_out = fec; + + return 0; +} + +/** + * ice_phy_cfg_lane_e822 - Configure PHY quad for single/multi-lane timestamp + * @hw: pointer to HW struct + * @port: to configure the quad for + */ +static void ice_phy_cfg_lane_e822(struct ice_hw *hw, u8 port) +{ + enum ice_ptp_link_spd link_spd; + int err; + u32 val; + u8 quad; + + err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, NULL); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to get PHY link speed, err %d\n", + err); + return; + } + + quad = port / ICE_PORTS_PER_QUAD; + + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEM_GLB_CFG, err %d\n", + err); + return; + } + + if (link_spd >= ICE_PTP_LNK_SPD_40G) + val &= ~Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M; + else + val |= Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M; + + err = ice_write_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write back TX_MEM_GBL_CFG, err %d\n", + err); + return; + } +} + +/** + * ice_phy_cfg_uix_e822 - Configure Serdes UI to TU conversion for E822 + * @hw: pointer to the HW structure + * @port: the port to configure + * + * Program the conversion ration of Serdes clock "unit intervals" (UIs) to PHC + * hardware clock time units (TUs). That is, determine the number of TUs per + * serdes unit interval, and program the UIX registers with this conversion. + * + * This conversion is used as part of the calibration process when determining + * the additional error of a timestamp vs the real time of transmission or + * receipt of the packet. + * + * Hardware uses the number of TUs per 66 UIs, written to the UIX registers + * for the two main serdes clock rates, 10G/40G and 25G/100G serdes clocks. + * + * To calculate the conversion ratio, we use the following facts: + * + * a) the clock frequency in Hz (cycles per second) + * b) the number of TUs per cycle (the increment value of the clock) + * c) 1 second per 1 billion nanoseconds + * d) the duration of 66 UIs in nanoseconds + * + * Given these facts, we can use the following table to work out what ratios + * to multiply in order to get the number of TUs per 66 UIs: + * + * cycles | 1 second | incval (TUs) | nanoseconds + * -------+--------------+--------------+------------- + * second | 1 billion ns | cycle | 66 UIs + * + * To perform the multiplication using integers without too much loss of + * precision, we can take use the following equation: + * + * (freq * incval * 6600 LINE_UI ) / ( 100 * 1 billion) + * + * We scale up to using 6600 UI instead of 66 in order to avoid fractional + * nanosecond UIs (66 UI at 10G/40G is 6.4 ns) + * + * The increment value has a maximum expected range of about 34 bits, while + * the frequency value is about 29 bits. Multiplying these values shouldn't + * overflow the 64 bits. However, we must then further multiply them again by + * the Serdes unit interval duration. To avoid overflow here, we split the + * overall divide by 1e11 into a divide by 256 (shift down by 8 bits) and + * a divide by 390,625,000. This does lose some precision, but avoids + * miscalculation due to arithmetic overflow. + */ +static int ice_phy_cfg_uix_e822(struct ice_hw *hw, u8 port) +{ + u64 cur_freq, clk_incval, tu_per_sec, uix; + int err; + + cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw)); + clk_incval = ice_ptp_read_src_incval(hw); + + /* Calculate TUs per second divided by 256 */ + tu_per_sec = (cur_freq * clk_incval) >> 8; + +#define LINE_UI_10G_40G 640 /* 6600 UIs is 640 nanoseconds at 10Gb/40Gb */ +#define LINE_UI_25G_100G 256 /* 6600 UIs is 256 nanoseconds at 25Gb/100Gb */ + + /* Program the 10Gb/40Gb conversion ratio */ + uix = div_u64(tu_per_sec * LINE_UI_10G_40G, 390625000); + + err = ice_write_64b_phy_reg_e822(hw, port, P_REG_UIX66_10G_40G_L, + uix); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write UIX66_10G_40G, err %d\n", + err); + return err; + } + + /* Program the 25Gb/100Gb conversion ratio */ + uix = div_u64(tu_per_sec * LINE_UI_25G_100G, 390625000); + + err = ice_write_64b_phy_reg_e822(hw, port, P_REG_UIX66_25G_100G_L, + uix); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write UIX66_25G_100G, err %d\n", + err); + return err; + } + + return 0; +} + +/** + * ice_phy_cfg_parpcs_e822 - Configure TUs per PAR/PCS clock cycle + * @hw: pointer to the HW struct + * @port: port to configure + * + * Configure the number of TUs for the PAR and PCS clocks used as part of the + * timestamp calibration process. This depends on the link speed, as the PHY + * uses different markers depending on the speed. + * + * 1Gb/10Gb/25Gb: + * - Tx/Rx PAR/PCS markers + * + * 25Gb RS: + * - Tx/Rx Reed Solomon gearbox PAR/PCS markers + * + * 40Gb/50Gb: + * - Tx/Rx PAR/PCS markers + * - Rx Deskew PAR/PCS markers + * + * 50G RS and 100GB RS: + * - Tx/Rx Reed Solomon gearbox PAR/PCS markers + * - Rx Deskew PAR/PCS markers + * - Tx PAR/PCS markers + * + * To calculate the conversion, we use the PHC clock frequency (cycles per + * second), the increment value (TUs per cycle), and the related PHY clock + * frequency to calculate the TUs per unit of the PHY link clock. The + * following table shows how the units convert: + * + * cycles | TUs | second + * -------+-------+-------- + * second | cycle | cycles + * + * For each conversion register, look up the appropriate frequency from the + * e822 PAR/PCS table and calculate the TUs per unit of that clock. Program + * this to the appropriate register, preparing hardware to perform timestamp + * calibration to calculate the total Tx or Rx offset to adjust the timestamp + * in order to calibrate for the internal PHY delays. + * + * Note that the increment value ranges up to ~34 bits, and the clock + * frequency is ~29 bits, so multiplying them together should fit within the + * 64 bit arithmetic. + */ +static int ice_phy_cfg_parpcs_e822(struct ice_hw *hw, u8 port) +{ + u64 cur_freq, clk_incval, tu_per_sec, phy_tus; + enum ice_ptp_link_spd link_spd; + enum ice_ptp_fec_mode fec_mode; + int err; + + err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); + if (err) + return err; + + cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw)); + clk_incval = ice_ptp_read_src_incval(hw); + + /* Calculate TUs per cycle of the PHC clock */ + tu_per_sec = cur_freq * clk_incval; + + /* For each PHY conversion register, look up the appropriate link + * speed frequency and determine the TUs per that clock's cycle time. + * Split this into a high and low value and then program the + * appropriate register. If that link speed does not use the + * associated register, write zeros to clear it instead. + */ + + /* P_REG_PAR_TX_TUS */ + if (e822_vernier[link_spd].tx_par_clk) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].tx_par_clk); + else + phy_tus = 0; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PAR_TX_TUS_L, + phy_tus); + if (err) + return err; + + /* P_REG_PAR_RX_TUS */ + if (e822_vernier[link_spd].rx_par_clk) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].rx_par_clk); + else + phy_tus = 0; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PAR_RX_TUS_L, + phy_tus); + if (err) + return err; + + /* P_REG_PCS_TX_TUS */ + if (e822_vernier[link_spd].tx_pcs_clk) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].tx_pcs_clk); + else + phy_tus = 0; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PCS_TX_TUS_L, + phy_tus); + if (err) + return err; + + /* P_REG_PCS_RX_TUS */ + if (e822_vernier[link_spd].rx_pcs_clk) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].rx_pcs_clk); + else + phy_tus = 0; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PCS_RX_TUS_L, + phy_tus); + if (err) + return err; + + /* P_REG_DESK_PAR_TX_TUS */ + if (e822_vernier[link_spd].tx_desk_rsgb_par) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].tx_desk_rsgb_par); + else + phy_tus = 0; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PAR_TX_TUS_L, + phy_tus); + if (err) + return err; + + /* P_REG_DESK_PAR_RX_TUS */ + if (e822_vernier[link_spd].rx_desk_rsgb_par) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].rx_desk_rsgb_par); + else + phy_tus = 0; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PAR_RX_TUS_L, + phy_tus); + if (err) + return err; + + /* P_REG_DESK_PCS_TX_TUS */ + if (e822_vernier[link_spd].tx_desk_rsgb_pcs) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].tx_desk_rsgb_pcs); + else + phy_tus = 0; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PCS_TX_TUS_L, + phy_tus); + if (err) + return err; + + /* P_REG_DESK_PCS_RX_TUS */ + if (e822_vernier[link_spd].rx_desk_rsgb_pcs) + phy_tus = div_u64(tu_per_sec, + e822_vernier[link_spd].rx_desk_rsgb_pcs); + else + phy_tus = 0; + + return ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PCS_RX_TUS_L, + phy_tus); +} + +/** + * ice_calc_fixed_tx_offset_e822 - Calculated Fixed Tx offset for a port + * @hw: pointer to the HW struct + * @link_spd: the Link speed to calculate for + * + * Calculate the fixed offset due to known static latency data. + */ +static u64 +ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) +{ + u64 cur_freq, clk_incval, tu_per_sec, fixed_offset; + + cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw)); + clk_incval = ice_ptp_read_src_incval(hw); + + /* Calculate TUs per second */ + tu_per_sec = cur_freq * clk_incval; + + /* Calculate number of TUs to add for the fixed Tx latency. Since the + * latency measurement is in 1/100th of a nanosecond, we need to + * multiply by tu_per_sec and then divide by 1e11. This calculation + * overflows 64 bit integer arithmetic, so break it up into two + * divisions by 1e4 first then by 1e7. + */ + fixed_offset = div_u64(tu_per_sec, 10000); + fixed_offset *= e822_vernier[link_spd].tx_fixed_delay; + fixed_offset = div_u64(fixed_offset, 10000000); + + return fixed_offset; +} + +/** + * ice_phy_cfg_tx_offset_e822 - Configure total Tx timestamp offset + * @hw: pointer to the HW struct + * @port: the PHY port to configure + * + * Program the P_REG_TOTAL_TX_OFFSET register with the total number of TUs to + * adjust Tx timestamps by. This is calculated by combining some known static + * latency along with the Vernier offset computations done by hardware. + * + * This function must be called only after the offset registers are valid, + * i.e. after the Vernier calibration wait has passed, to ensure that the PHY + * has measured the offset. + * + * To avoid overflow, when calculating the offset based on the known static + * latency values, we use measurements in 1/100th of a nanosecond, and divide + * the TUs per second up front. This avoids overflow while allowing + * calculation of the adjustment using integer arithmetic. + */ +static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) +{ + enum ice_ptp_link_spd link_spd; + enum ice_ptp_fec_mode fec_mode; + u64 total_offset, val; + int err; + + err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); + if (err) + return err; + + total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd); + + /* Read the first Vernier offset from the PHY register and add it to + * the total offset. + */ + if (link_spd == ICE_PTP_LNK_SPD_1G || + link_spd == ICE_PTP_LNK_SPD_10G || + link_spd == ICE_PTP_LNK_SPD_25G || + link_spd == ICE_PTP_LNK_SPD_25G_RS || + link_spd == ICE_PTP_LNK_SPD_40G || + link_spd == ICE_PTP_LNK_SPD_50G) { + err = ice_read_64b_phy_reg_e822(hw, port, + P_REG_PAR_PCS_TX_OFFSET_L, + &val); + if (err) + return err; + + total_offset += val; + } + + /* For Tx, we only need to use the second Vernier offset for + * multi-lane link speeds with RS-FEC. The lanes will always be + * aligned. + */ + if (link_spd == ICE_PTP_LNK_SPD_50G_RS || + link_spd == ICE_PTP_LNK_SPD_100G_RS) { + err = ice_read_64b_phy_reg_e822(hw, port, + P_REG_PAR_TX_TIME_L, + &val); + if (err) + return err; + + total_offset += val; + } + + /* Now that the total offset has been calculated, program it to the + * PHY and indicate that the Tx offset is ready. After this, + * timestamps will be enabled. + */ + err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L, + total_offset); + if (err) + return err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1); + if (err) + return err; + + return 0; +} + +/** + * ice_phy_cfg_fixed_tx_offset_e822 - Configure Tx offset for bypass mode + * @hw: pointer to the HW struct + * @port: the PHY port to configure + * + * Calculate and program the fixed Tx offset, and indicate that the offset is + * ready. This can be used when operating in bypass mode. + */ +static int +ice_phy_cfg_fixed_tx_offset_e822(struct ice_hw *hw, u8 port) +{ + enum ice_ptp_link_spd link_spd; + enum ice_ptp_fec_mode fec_mode; + u64 total_offset; + int err; + + err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); + if (err) + return err; + + total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd); + + /* Program the fixed Tx offset into the P_REG_TOTAL_TX_OFFSET_L + * register, then indicate that the Tx offset is ready. After this, + * timestamps will be enabled. + * + * Note that this skips including the more precise offsets generated + * by the Vernier calibration. + */ + err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L, + total_offset); + if (err) + return err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1); + if (err) + return err; + + return 0; +} + +/** + * ice_phy_calc_pmd_adj_e822 - Calculate PMD adjustment for Rx + * @hw: pointer to the HW struct + * @port: the PHY port to adjust for + * @link_spd: the current link speed of the PHY + * @fec_mode: the current FEC mode of the PHY + * @pmd_adj: on return, the amount to adjust the Rx total offset by + * + * Calculates the adjustment to Rx timestamps due to PMD alignment in the PHY. + * This varies by link speed and FEC mode. The value calculated accounts for + * various delays caused when receiving a packet. + */ +static int +ice_phy_calc_pmd_adj_e822(struct ice_hw *hw, u8 port, + enum ice_ptp_link_spd link_spd, + enum ice_ptp_fec_mode fec_mode, u64 *pmd_adj) +{ + u64 cur_freq, clk_incval, tu_per_sec, mult, adj; + u8 pmd_align; + u32 val; + int err; + + err = ice_read_phy_reg_e822(hw, port, P_REG_PMD_ALIGNMENT, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read PMD alignment, err %d\n", + err); + return err; + } + + pmd_align = (u8)val; + + cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw)); + clk_incval = ice_ptp_read_src_incval(hw); + + /* Calculate TUs per second */ + tu_per_sec = cur_freq * clk_incval; + + /* The PMD alignment adjustment measurement depends on the link speed, + * and whether FEC is enabled. For each link speed, the alignment + * adjustment is calculated by dividing a value by the length of + * a Time Unit in nanoseconds. + * + * 1G: align == 4 ? 10 * 0.8 : (align + 6 % 10) * 0.8 + * 10G: align == 65 ? 0 : (align * 0.1 * 32/33) + * 10G w/FEC: align * 0.1 * 32/33 + * 25G: align == 65 ? 0 : (align * 0.4 * 32/33) + * 25G w/FEC: align * 0.4 * 32/33 + * 40G: align == 65 ? 0 : (align * 0.1 * 32/33) + * 40G w/FEC: align * 0.1 * 32/33 + * 50G: align == 65 ? 0 : (align * 0.4 * 32/33) + * 50G w/FEC: align * 0.8 * 32/33 + * + * For RS-FEC, if align is < 17 then we must also add 1.6 * 32/33. + * + * To allow for calculating this value using integer arithmetic, we + * instead start with the number of TUs per second, (inverse of the + * length of a Time Unit in nanoseconds), multiply by a value based + * on the PMD alignment register, and then divide by the right value + * calculated based on the table above. To avoid integer overflow this + * division is broken up into a step of dividing by 125 first. + */ + if (link_spd == ICE_PTP_LNK_SPD_1G) { + if (pmd_align == 4) + mult = 10; + else + mult = (pmd_align + 6) % 10; + } else if (link_spd == ICE_PTP_LNK_SPD_10G || + link_spd == ICE_PTP_LNK_SPD_25G || + link_spd == ICE_PTP_LNK_SPD_40G || + link_spd == ICE_PTP_LNK_SPD_50G) { + /* If Clause 74 FEC, always calculate PMD adjust */ + if (pmd_align != 65 || fec_mode == ICE_PTP_FEC_MODE_CLAUSE74) + mult = pmd_align; + else + mult = 0; + } else if (link_spd == ICE_PTP_LNK_SPD_25G_RS || + link_spd == ICE_PTP_LNK_SPD_50G_RS || + link_spd == ICE_PTP_LNK_SPD_100G_RS) { + if (pmd_align < 17) + mult = pmd_align + 40; + else + mult = pmd_align; + } else { + ice_debug(hw, ICE_DBG_PTP, "Unknown link speed %d, skipping PMD adjustment\n", + link_spd); + mult = 0; + } + + /* In some cases, there's no need to adjust for the PMD alignment */ + if (!mult) { + *pmd_adj = 0; + return 0; + } + + /* Calculate the adjustment by multiplying TUs per second by the + * appropriate multiplier and divisor. To avoid overflow, we first + * divide by 125, and then handle remaining divisor based on the link + * speed pmd_adj_divisor value. + */ + adj = div_u64(tu_per_sec, 125); + adj *= mult; + adj = div_u64(adj, e822_vernier[link_spd].pmd_adj_divisor); + + /* Finally, for 25G-RS and 50G-RS, a further adjustment for the Rx + * cycle count is necessary. + */ + if (link_spd == ICE_PTP_LNK_SPD_25G_RS) { + u64 cycle_adj; + u8 rx_cycle; + + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_40_TO_160_CNT, + &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read 25G-RS Rx cycle count, err %d\n", + err); + return err; + } + + rx_cycle = val & P_REG_RX_40_TO_160_CNT_RXCYC_M; + if (rx_cycle) { + mult = (4 - rx_cycle) * 40; + + cycle_adj = div_u64(tu_per_sec, 125); + cycle_adj *= mult; + cycle_adj = div_u64(cycle_adj, e822_vernier[link_spd].pmd_adj_divisor); + + adj += cycle_adj; + } + } else if (link_spd == ICE_PTP_LNK_SPD_50G_RS) { + u64 cycle_adj; + u8 rx_cycle; + + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_80_TO_160_CNT, + &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read 50G-RS Rx cycle count, err %d\n", + err); + return err; + } + + rx_cycle = val & P_REG_RX_80_TO_160_CNT_RXCYC_M; + if (rx_cycle) { + mult = rx_cycle * 40; + + cycle_adj = div_u64(tu_per_sec, 125); + cycle_adj *= mult; + cycle_adj = div_u64(cycle_adj, e822_vernier[link_spd].pmd_adj_divisor); + + adj += cycle_adj; + } + } + + /* Return the calculated adjustment */ + *pmd_adj = adj; + + return 0; +} + +/** + * ice_calc_fixed_rx_offset_e822 - Calculated the fixed Rx offset for a port + * @hw: pointer to HW struct + * @link_spd: The Link speed to calculate for + * + * Determine the fixed Rx latency for a given link speed. + */ +static u64 +ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) +{ + u64 cur_freq, clk_incval, tu_per_sec, fixed_offset; + + cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw)); + clk_incval = ice_ptp_read_src_incval(hw); + + /* Calculate TUs per second */ + tu_per_sec = cur_freq * clk_incval; + + /* Calculate number of TUs to add for the fixed Rx latency. Since the + * latency measurement is in 1/100th of a nanosecond, we need to + * multiply by tu_per_sec and then divide by 1e11. This calculation + * overflows 64 bit integer arithmetic, so break it up into two + * divisions by 1e4 first then by 1e7. + */ + fixed_offset = div_u64(tu_per_sec, 10000); + fixed_offset *= e822_vernier[link_spd].rx_fixed_delay; + fixed_offset = div_u64(fixed_offset, 10000000); + + return fixed_offset; +} + +/** + * ice_phy_cfg_rx_offset_e822 - Configure total Rx timestamp offset + * @hw: pointer to the HW struct + * @port: the PHY port to configure + * + * Program the P_REG_TOTAL_RX_OFFSET register with the number of Time Units to + * adjust Rx timestamps by. This combines calculations from the Vernier offset + * measurements taken in hardware with some data about known fixed delay as + * well as adjusting for multi-lane alignment delay. + * + * This function must be called only after the offset registers are valid, + * i.e. after the Vernier calibration wait has passed, to ensure that the PHY + * has measured the offset. + * + * To avoid overflow, when calculating the offset based on the known static + * latency values, we use measurements in 1/100th of a nanosecond, and divide + * the TUs per second up front. This avoids overflow while allowing + * calculation of the adjustment using integer arithmetic. + */ +static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) +{ + enum ice_ptp_link_spd link_spd; + enum ice_ptp_fec_mode fec_mode; + u64 total_offset, pmd, val; + int err; + + err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); + if (err) + return err; + + total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd); + + /* Read the first Vernier offset from the PHY register and add it to + * the total offset. + */ + err = ice_read_64b_phy_reg_e822(hw, port, + P_REG_PAR_PCS_RX_OFFSET_L, + &val); + if (err) + return err; + + total_offset += val; + + /* For Rx, all multi-lane link speeds include a second Vernier + * calibration, because the lanes might not be aligned. + */ + if (link_spd == ICE_PTP_LNK_SPD_40G || + link_spd == ICE_PTP_LNK_SPD_50G || + link_spd == ICE_PTP_LNK_SPD_50G_RS || + link_spd == ICE_PTP_LNK_SPD_100G_RS) { + err = ice_read_64b_phy_reg_e822(hw, port, + P_REG_PAR_RX_TIME_L, + &val); + if (err) + return err; + + total_offset += val; + } + + /* In addition, Rx must account for the PMD alignment */ + err = ice_phy_calc_pmd_adj_e822(hw, port, link_spd, fec_mode, &pmd); + if (err) + return err; + + /* For RS-FEC, this adjustment adds delay, but for other modes, it + * subtracts delay. + */ + if (fec_mode == ICE_PTP_FEC_MODE_RS_FEC) + total_offset += pmd; + else + total_offset -= pmd; + + /* Now that the total offset has been calculated, program it to the + * PHY and indicate that the Rx offset is ready. After this, + * timestamps will be enabled. + */ + err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L, + total_offset); + if (err) + return err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1); + if (err) + return err; + + return 0; +} + +/** + * ice_phy_cfg_fixed_rx_offset_e822 - Configure fixed Rx offset for bypass mode + * @hw: pointer to the HW struct + * @port: the PHY port to configure + * + * Calculate and program the fixed Rx offset, and indicate that the offset is + * ready. This can be used when operating in bypass mode. + */ +static int +ice_phy_cfg_fixed_rx_offset_e822(struct ice_hw *hw, u8 port) +{ + enum ice_ptp_link_spd link_spd; + enum ice_ptp_fec_mode fec_mode; + u64 total_offset; + int err; + + err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); + if (err) + return err; + + total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd); + + /* Program the fixed Rx offset into the P_REG_TOTAL_RX_OFFSET_L + * register, then indicate that the Rx offset is ready. After this, + * timestamps will be enabled. + * + * Note that this skips including the more precise offsets generated + * by Vernier calibration. + */ + err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L, + total_offset); + if (err) + return err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1); + if (err) + return err; + + return 0; +} + +/** + * ice_read_phy_and_phc_time_e822 - Simultaneously capture PHC and PHY time + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @phy_time: on return, the 64bit PHY timer value + * @phc_time: on return, the lower 64bits of PHC time + * + * Issue a READ_TIME timer command to simultaneously capture the PHY and PHC + * timer values. + */ +static int +ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time, + u64 *phc_time) +{ + u64 tx_time, rx_time; + u32 zo, lo; + u8 tmr_idx; + int err; + + tmr_idx = ice_get_ptp_src_clock_index(hw); + + /* Prepare the PHC timer for a READ_TIME capture command */ + ice_ptp_src_cmd(hw, READ_TIME); + + /* Prepare the PHY timer for a READ_TIME capture command */ + err = ice_ptp_one_port_cmd(hw, port, READ_TIME); + if (err) + return err; + + /* Issue the sync to start the READ_TIME capture */ + ice_ptp_exec_tmr_cmd(hw); + + /* Read the captured PHC time from the shadow time registers */ + zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx)); + lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx)); + *phc_time = (u64)lo << 32 | zo; + + /* Read the captured PHY time from the PHY shadow registers */ + err = ice_ptp_read_port_capture(hw, port, &tx_time, &rx_time); + if (err) + return err; + + /* If the PHY Tx and Rx timers don't match, log a warning message. + * Note that this should not happen in normal circumstances since the + * driver always programs them together. + */ + if (tx_time != rx_time) + dev_warn(ice_hw_to_dev(hw), + "PHY port %u Tx and Rx timers do not match, tx_time 0x%016llX, rx_time 0x%016llX\n", + port, (unsigned long long)tx_time, + (unsigned long long)rx_time); + + *phy_time = tx_time; + + return 0; +} + +/** + * ice_sync_phy_timer_e822 - Synchronize the PHY timer with PHC timer + * @hw: pointer to the HW struct + * @port: the PHY port to synchronize + * + * Perform an adjustment to ensure that the PHY and PHC timers are in sync. + * This is done by issuing a READ_TIME command which triggers a simultaneous + * read of the PHY timer and PHC timer. Then we use the difference to + * calculate an appropriate 2s complement addition to add to the PHY timer in + * order to ensure it reads the same value as the primary PHC timer. + */ +static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port) +{ + u64 phc_time, phy_time, difference; + int err; + + if (!ice_ptp_lock(hw)) { + ice_debug(hw, ICE_DBG_PTP, "Failed to acquire PTP semaphore\n"); + return -EBUSY; + } + + err = ice_read_phy_and_phc_time_e822(hw, port, &phy_time, &phc_time); + if (err) + goto err_unlock; + + /* Calculate the amount required to add to the port time in order for + * it to match the PHC time. + * + * Note that the port adjustment is done using 2s complement + * arithmetic. This is convenient since it means that we can simply + * calculate the difference between the PHC time and the port time, + * and it will be interpreted correctly. + */ + difference = phc_time - phy_time; + + err = ice_ptp_prep_port_adj_e822(hw, port, (s64)difference); + if (err) + goto err_unlock; + + err = ice_ptp_one_port_cmd(hw, port, ADJ_TIME); + if (err) + goto err_unlock; + + /* Issue the sync to activate the time adjustment */ + ice_ptp_exec_tmr_cmd(hw); + + /* Re-capture the timer values to flush the command registers and + * verify that the time was properly adjusted. + */ + err = ice_read_phy_and_phc_time_e822(hw, port, &phy_time, &phc_time); + if (err) + goto err_unlock; + + dev_info(ice_hw_to_dev(hw), + "Port %u PHY time synced to PHC: 0x%016llX, 0x%016llX\n", + port, (unsigned long long)phy_time, + (unsigned long long)phc_time); + + ice_ptp_unlock(hw); + + return 0; + +err_unlock: + ice_ptp_unlock(hw); + return err; +} + +/** + * ice_stop_phy_timer_e822 - Stop the PHY clock timer + * @hw: pointer to the HW struct + * @port: the PHY port to stop + * @soft_reset: if true, hold the SOFT_RESET bit of P_REG_PS + * + * Stop the clock of a PHY port. This must be done as part of the flow to + * re-calibrate Tx and Rx timestamping offsets whenever the clock time is + * initialized or when link speed changes. + */ +int +ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset) +{ + int err; + u32 val; + + err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 0); + if (err) + return err; + + err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 0); + if (err) + return err; + + err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); + if (err) + return err; + + val &= ~P_REG_PS_START_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + val &= ~P_REG_PS_ENA_CLK_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + if (soft_reset) { + val |= P_REG_PS_SFT_RESET_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + } + + ice_debug(hw, ICE_DBG_PTP, "Disabled clock on PHY port %u\n", port); + + return 0; +} + +/** + * ice_start_phy_timer_e822 - Start the PHY clock timer + * @hw: pointer to the HW struct + * @port: the PHY port to start + * @bypass: if true, start the PHY in bypass mode + * + * Start the clock of a PHY port. This must be done as part of the flow to + * re-calibrate Tx and Rx timestamping offsets whenever the clock time is + * initialized or when link speed changes. + * + * Bypass mode enables timestamps immediately without waiting for Vernier + * calibration to complete. Hardware will still continue taking Vernier + * measurements on Tx or Rx of packets, but they will not be applied to + * timestamps. Use ice_phy_exit_bypass_e822 to exit bypass mode once hardware + * has completed offset calculation. + */ +int +ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) +{ + u32 lo, hi, val; + u64 incval; + u8 tmr_idx; + int err; + + tmr_idx = ice_get_ptp_src_clock_index(hw); + + err = ice_stop_phy_timer_e822(hw, port, false); + if (err) + return err; + + ice_phy_cfg_lane_e822(hw, port); + + err = ice_phy_cfg_uix_e822(hw, port); + if (err) + return err; + + err = ice_phy_cfg_parpcs_e822(hw, port); + if (err) + return err; + + lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx)); + hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx)); + incval = (u64)hi << 32 | lo; + + err = ice_write_40b_phy_reg_e822(hw, port, P_REG_TIMETUS_L, incval); + if (err) + return err; + + err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL); + if (err) + return err; + + ice_ptp_exec_tmr_cmd(hw); + + err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); + if (err) + return err; + + val |= P_REG_PS_SFT_RESET_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + val |= P_REG_PS_START_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + val &= ~P_REG_PS_SFT_RESET_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL); + if (err) + return err; + + ice_ptp_exec_tmr_cmd(hw); + + val |= P_REG_PS_ENA_CLK_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + val |= P_REG_PS_LOAD_OFFSET_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + ice_ptp_exec_tmr_cmd(hw); + + err = ice_sync_phy_timer_e822(hw, port); + if (err) + return err; + + if (bypass) { + val |= P_REG_PS_BYPASS_MODE_M; + /* Enter BYPASS mode, enabling timestamps immediately. */ + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) + return err; + + /* Program the fixed Tx offset */ + err = ice_phy_cfg_fixed_tx_offset_e822(hw, port); + if (err) + return err; + + /* Program the fixed Rx offset */ + err = ice_phy_cfg_fixed_rx_offset_e822(hw, port); + if (err) + return err; + } + + ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port); + + return 0; +} + +/** + * ice_phy_exit_bypass_e822 - Exit bypass mode, after vernier calculations + * @hw: pointer to the HW struct + * @port: the PHY port to configure + * + * After hardware finishes vernier calculations for the Tx and Rx offset, this + * function can be used to exit bypass mode by updating the total Tx and Rx + * offsets, and then disabling bypass. This will enable hardware to include + * the more precise offset calibrations, increasing precision of the generated + * timestamps. + * + * This cannot be done until hardware has measured the offsets, which requires + * waiting until at least one packet has been sent and received by the device. + */ +int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port) +{ + int err; + u32 val; + + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(val & P_REG_TX_OV_STATUS_OV_M)) { + ice_debug(hw, ICE_DBG_PTP, "Tx offset is not yet valid for port %u\n", + port); + return -EBUSY; + } + + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(val & P_REG_TX_OV_STATUS_OV_M)) { + ice_debug(hw, ICE_DBG_PTP, "Rx offset is not yet valid for port %u\n", + port); + return -EBUSY; + } + + err = ice_phy_cfg_tx_offset_e822(hw, port); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to program total Tx offset for port %u, err %d\n", + port, err); + return err; + } + + err = ice_phy_cfg_rx_offset_e822(hw, port); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to program total Rx offset for port %u, err %d\n", + port, err); + return err; + } + + /* Exit bypass mode now that the offset has been updated */ + err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read P_REG_PS for port %u, err %d\n", + port, err); + return err; + } + + if (!(val & P_REG_PS_BYPASS_MODE_M)) + ice_debug(hw, ICE_DBG_PTP, "Port %u not in bypass mode\n", + port); + + val &= ~P_REG_PS_BYPASS_MODE_M; + err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to disable bypass for port %u, err %d\n", + port, err); + return err; + } + + dev_info(ice_hw_to_dev(hw), "Exiting bypass mode on PHY port %u\n", + port); + + return 0; +} + /* E810 functions * * The following functions operate on the E810 series devices which use @@ -68,18 +2538,18 @@ u8 ice_get_ptp_src_clock_index(struct ice_hw *hw) static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val) { struct ice_sbq_msg_input msg = {0}; - int status; + int err; msg.msg_addr_low = lower_16_bits(addr); msg.msg_addr_high = upper_16_bits(addr); msg.opcode = ice_sbq_msg_rd; msg.dest_dev = rmn_0; - status = ice_sbq_rw_reg(hw, &msg); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, status %d\n", - status); - return status; + err = ice_sbq_rw_reg(hw, &msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", + err); + return err; } *val = msg.data; @@ -98,7 +2568,7 @@ static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val) static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val) { struct ice_sbq_msg_input msg = {0}; - int status; + int err; msg.msg_addr_low = lower_16_bits(addr); msg.msg_addr_high = upper_16_bits(addr); @@ -106,11 +2576,11 @@ static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val) msg.dest_dev = rmn_0; msg.data = val; - status = ice_sbq_rw_reg(hw, &msg); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, status %d\n", - status); - return status; + err = ice_sbq_rw_reg(hw, &msg); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", + err); + return err; } return 0; @@ -130,23 +2600,23 @@ static int ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) { u32 lo_addr, hi_addr, lo, hi; - int status; + int err; lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx); hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx); - status = ice_read_phy_reg_e810(hw, lo_addr, &lo); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, status %d\n", - status); - return status; + err = ice_read_phy_reg_e810(hw, lo_addr, &lo); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, err %d\n", + err); + return err; } - status = ice_read_phy_reg_e810(hw, hi_addr, &hi); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, status %d\n", - status); - return status; + err = ice_read_phy_reg_e810(hw, hi_addr, &hi); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, err %d\n", + err); + return err; } /* For E810 devices, the timestamp is reported with the lower 32 bits @@ -169,23 +2639,23 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx) { u32 lo_addr, hi_addr; - int status; + int err; lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx); hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx); - status = ice_write_phy_reg_e810(hw, lo_addr, 0); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, lo_addr, 0); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n", + err); + return err; } - status = ice_write_phy_reg_e810(hw, hi_addr, 0); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, hi_addr, 0); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n", + err); + return err; } return 0; @@ -200,17 +2670,32 @@ static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx) */ int ice_ptp_init_phy_e810(struct ice_hw *hw) { - int status; u8 tmr_idx; + int err; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx), - GLTSYN_ENA_TSYN_ENA_M); - if (status) + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx), + GLTSYN_ENA_TSYN_ENA_M); + if (err) ice_debug(hw, ICE_DBG_PTP, "PTP failed in ena_phy_time_syn %d\n", - status); + err); - return status; + return err; +} + +/** + * ice_ptp_init_phc_e810 - Perform E810 specific PHC initialization + * @hw: pointer to HW struct + * + * Perform E810-specific PTP hardware clock initialization steps. + */ +static int ice_ptp_init_phc_e810(struct ice_hw *hw) +{ + /* Ensure synchronization delay is zero */ + wr32(hw, GLTSYN_SYNC_DLAY, 0); + + /* Initialize the PHY */ + return ice_ptp_init_phy_e810(hw); } /** @@ -227,22 +2712,22 @@ int ice_ptp_init_phy_e810(struct ice_hw *hw) */ static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time) { - int status; u8 tmr_idx; + int err; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_0(tmr_idx), 0); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_0, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_0(tmr_idx), 0); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_0, err %d\n", + err); + return err; } - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_L(tmr_idx), time); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_L, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_L(tmr_idx), time); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_L, err %d\n", + err); + return err; } return 0; @@ -263,26 +2748,26 @@ static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time) */ static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj) { - int status; u8 tmr_idx; + int err; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* Adjustments are represented as signed 2's complement values in * nanoseconds. Sub-nanosecond adjustment is not supported. */ - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), 0); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_L, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), 0); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_L, err %d\n", + err); + return err; } - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), adj); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_H, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), adj); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_H, err %d\n", + err); + return err; } return 0; @@ -300,25 +2785,25 @@ static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj) static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval) { u32 high, low; - int status; u8 tmr_idx; + int err; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; low = lower_32_bits(incval); high = upper_32_bits(incval); - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), low); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write incval to PHY SHADJ_L, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), low); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write incval to PHY SHADJ_L, err %d\n", + err); + return err; } - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), high); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write incval PHY SHADJ_H, status %d\n", - status); - return status; + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), high); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write incval PHY SHADJ_H, err %d\n", + err); + return err; } return 0; @@ -335,7 +2820,7 @@ static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval) static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) { u32 cmd_val, val; - int status; + int err; switch (cmd) { case INIT_TIME: @@ -356,20 +2841,20 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) } /* Read, modify, write */ - status = ice_read_phy_reg_e810(hw, ETH_GLTSYN_CMD, &val); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read GLTSYN_CMD, status %d\n", status); - return status; + err = ice_read_phy_reg_e810(hw, ETH_GLTSYN_CMD, &val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read GLTSYN_CMD, err %d\n", err); + return err; } /* Modify necessary bits only and perform write */ val &= ~TS_CMD_MASK_E810; val |= cmd_val; - status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_CMD, val); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write back GLTSYN_CMD, status %d\n", status); - return status; + err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_CMD, val); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to write back GLTSYN_CMD, err %d\n", err); + return err; } return 0; @@ -377,12 +2862,9 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) /* Device agnostic functions * - * The following functions implement useful behavior to hide the differences - * between E810 and other devices. They call the device-specific - * implementations where necessary. - * - * Currently, the driver only supports E810, but future work will enable - * support for E822-based devices. + * The following functions implement shared behavior common to both E822 and + * E810 devices, possibly calling a device specific implementation where + * necessary. */ /** @@ -433,42 +2915,6 @@ void ice_ptp_unlock(struct ice_hw *hw) } /** - * ice_ptp_src_cmd - Prepare source timer for a timer command - * @hw: pointer to HW structure - * @cmd: Timer command - * - * Prepare the source timer for an upcoming timer sync command. - */ -static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) -{ - u32 cmd_val; - u8 tmr_idx; - - tmr_idx = ice_get_ptp_src_clock_index(hw); - cmd_val = tmr_idx << SEL_CPK_SRC; - - switch (cmd) { - case INIT_TIME: - cmd_val |= GLTSYN_CMD_INIT_TIME; - break; - case INIT_INCVAL: - cmd_val |= GLTSYN_CMD_INIT_INCVAL; - break; - case ADJ_TIME: - cmd_val |= GLTSYN_CMD_ADJ_TIME; - break; - case ADJ_TIME_AT_TIME: - cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME; - break; - case READ_TIME: - cmd_val |= GLTSYN_CMD_READ_TIME; - break; - } - - wr32(hw, GLTSYN_CMD, cmd_val); -} - -/** * ice_ptp_tmr_cmd - Prepare and trigger a timer sync command * @hw: pointer to HW struct * @cmd: the command to issue @@ -480,23 +2926,26 @@ static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) */ static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) { - int status; + int err; /* First, prepare the source timer */ ice_ptp_src_cmd(hw, cmd); /* Next, prepare the ports */ - status = ice_ptp_port_cmd_e810(hw, cmd); - if (status) { - ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, status %d\n", - cmd, status); - return status; + if (ice_is_e810(hw)) + err = ice_ptp_port_cmd_e810(hw, cmd); + else + err = ice_ptp_port_cmd_e822(hw, cmd); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, err %d\n", + cmd, err); + return err; } - /* Write the sync command register to drive both source and PHY timer commands - * synchronously + /* Write the sync command register to drive both source and PHY timer + * commands synchronously */ - wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD); + ice_ptp_exec_tmr_cmd(hw); return 0; } @@ -516,8 +2965,8 @@ static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) */ int ice_ptp_init_time(struct ice_hw *hw, u64 time) { - int status; u8 tmr_idx; + int err; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; @@ -528,9 +2977,12 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time) /* PHY timers */ /* Fill Rx and Tx ports and send msg to PHY */ - status = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF); - if (status) - return status; + if (ice_is_e810(hw)) + err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF); + else + err = ice_ptp_prep_phy_time_e822(hw, time & 0xFFFFFFFF); + if (err) + return err; return ice_ptp_tmr_cmd(hw, INIT_TIME); } @@ -551,8 +3003,8 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time) */ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) { - int status; u8 tmr_idx; + int err; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; @@ -560,9 +3012,12 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval)); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval)); - status = ice_ptp_prep_phy_incval_e810(hw, incval); - if (status) - return status; + if (ice_is_e810(hw)) + err = ice_ptp_prep_phy_incval_e810(hw, incval); + else + err = ice_ptp_prep_phy_incval_e822(hw, incval); + if (err) + return err; return ice_ptp_tmr_cmd(hw, INIT_INCVAL); } @@ -576,16 +3031,16 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) */ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval) { - int status; + int err; if (!ice_ptp_lock(hw)) return -EBUSY; - status = ice_ptp_write_incval(hw, incval); + err = ice_ptp_write_incval(hw, incval); ice_ptp_unlock(hw); - return status; + return err; } /** @@ -603,8 +3058,8 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval) */ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) { - int status; u8 tmr_idx; + int err; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; @@ -616,9 +3071,12 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj); - status = ice_ptp_prep_phy_adj_e810(hw, adj); - if (status) - return status; + if (ice_is_e810(hw)) + err = ice_ptp_prep_phy_adj_e810(hw, adj); + else + err = ice_ptp_prep_phy_adj_e822(hw, adj); + if (err) + return err; return ice_ptp_tmr_cmd(hw, ADJ_TIME); } @@ -630,11 +3088,16 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) * @idx: the timestamp index to read * @tstamp: on return, the 40bit timestamp value * - * Read a 40bit timestamp value out of the timestamp block. + * Read a 40bit timestamp value out of the timestamp block. For E822 devices, + * the block is the quad to read from. For E810 devices, the block is the + * logical port to read from. */ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) { - return ice_read_phy_tstamp_e810(hw, block, idx, tstamp); + if (ice_is_e810(hw)) + return ice_read_phy_tstamp_e810(hw, block, idx, tstamp); + else + return ice_read_phy_tstamp_e822(hw, block, idx, tstamp); } /** @@ -643,11 +3106,16 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) * @block: the block to read from * @idx: the timestamp index to reset * - * Clear a timestamp, resetting its valid bit, from the timestamp block. + * Clear a timestamp, resetting its valid bit, from the timestamp block. For + * E822 devices, the block is the quad to clear from. For E810 devices, the + * block is the logical port to clear from. */ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) { - return ice_clear_phy_tstamp_e810(hw, block, idx); + if (ice_is_e810(hw)) + return ice_clear_phy_tstamp_e810(hw, block, idx); + else + return ice_clear_phy_tstamp_e822(hw, block, idx); } /* E810T SMA functions @@ -800,3 +3268,25 @@ bool ice_is_pca9575_present(struct ice_hw *hw) return !status && handle; } + +/** + * ice_ptp_init_phc - Initialize PTP hardware clock + * @hw: pointer to the HW struct + * + * Perform the steps required to initialize the PTP hardware clock. + */ +int ice_ptp_init_phc(struct ice_hw *hw) +{ + u8 src_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + /* Enable source clocks */ + wr32(hw, GLTSYN_ENA(src_idx), GLTSYN_ENA_TSYN_ENA_M); + + /* Clear event err indications for auxiliary pins */ + (void)rd32(hw, GLTSYN_STAT(src_idx)); + + if (ice_is_e810(hw)) + return ice_ptp_init_phc_e810(hw); + else + return ice_ptp_init_phc_e822(hw); +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index b2984b5c22c1..519e75462e67 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -12,6 +12,112 @@ enum ice_ptp_tmr_cmd { READ_TIME }; +enum ice_ptp_serdes { + ICE_PTP_SERDES_1G, + ICE_PTP_SERDES_10G, + ICE_PTP_SERDES_25G, + ICE_PTP_SERDES_40G, + ICE_PTP_SERDES_50G, + ICE_PTP_SERDES_100G +}; + +enum ice_ptp_link_spd { + ICE_PTP_LNK_SPD_1G, + ICE_PTP_LNK_SPD_10G, + ICE_PTP_LNK_SPD_25G, + ICE_PTP_LNK_SPD_25G_RS, + ICE_PTP_LNK_SPD_40G, + ICE_PTP_LNK_SPD_50G, + ICE_PTP_LNK_SPD_50G_RS, + ICE_PTP_LNK_SPD_100G_RS, + NUM_ICE_PTP_LNK_SPD /* Must be last */ +}; + +enum ice_ptp_fec_mode { + ICE_PTP_FEC_MODE_NONE, + ICE_PTP_FEC_MODE_CLAUSE74, + ICE_PTP_FEC_MODE_RS_FEC +}; + +/** + * struct ice_time_ref_info_e822 + * @pll_freq: Frequency of PLL that drives timer ticks in Hz + * @nominal_incval: increment to generate nanoseconds in GLTSYN_TIME_L + * @pps_delay: propagation delay of the PPS output signal + * + * Characteristic information for the various TIME_REF sources possible in the + * E822 devices + */ +struct ice_time_ref_info_e822 { + u64 pll_freq; + u64 nominal_incval; + u8 pps_delay; +}; + +/** + * struct ice_vernier_info_e822 + * @tx_par_clk: Frequency used to calculate P_REG_PAR_TX_TUS + * @rx_par_clk: Frequency used to calculate P_REG_PAR_RX_TUS + * @tx_pcs_clk: Frequency used to calculate P_REG_PCS_TX_TUS + * @rx_pcs_clk: Frequency used to calculate P_REG_PCS_RX_TUS + * @tx_desk_rsgb_par: Frequency used to calculate P_REG_DESK_PAR_TX_TUS + * @rx_desk_rsgb_par: Frequency used to calculate P_REG_DESK_PAR_RX_TUS + * @tx_desk_rsgb_pcs: Frequency used to calculate P_REG_DESK_PCS_TX_TUS + * @rx_desk_rsgb_pcs: Frequency used to calculate P_REG_DESK_PCS_RX_TUS + * @tx_fixed_delay: Fixed Tx latency measured in 1/100th nanoseconds + * @pmd_adj_divisor: Divisor used to calculate PDM alignment adjustment + * @rx_fixed_delay: Fixed Rx latency measured in 1/100th nanoseconds + * + * Table of constants used during as part of the Vernier calibration of the Tx + * and Rx timestamps. This includes frequency values used to compute TUs per + * PAR/PCS clock cycle, and static delay values measured during hardware + * design. + * + * Note that some values are not used for all link speeds, and the + * P_REG_DESK_PAR* registers may represent different clock markers at + * different link speeds, either the deskew marker for multi-lane link speeds + * or the Reed Solomon gearbox marker for RS-FEC. + */ +struct ice_vernier_info_e822 { + u32 tx_par_clk; + u32 rx_par_clk; + u32 tx_pcs_clk; + u32 rx_pcs_clk; + u32 tx_desk_rsgb_par; + u32 rx_desk_rsgb_par; + u32 tx_desk_rsgb_pcs; + u32 rx_desk_rsgb_pcs; + u32 tx_fixed_delay; + u32 pmd_adj_divisor; + u32 rx_fixed_delay; +}; + +/** + * struct ice_cgu_pll_params_e822 + * @refclk_pre_div: Reference clock pre-divisor + * @feedback_div: Feedback divisor + * @frac_n_div: Fractional divisor + * @post_pll_div: Post PLL divisor + * + * Clock Generation Unit parameters used to program the PLL based on the + * selected TIME_REF frequency. + */ +struct ice_cgu_pll_params_e822 { + u32 refclk_pre_div; + u32 feedback_div; + u32 frac_n_div; + u32 post_pll_div; +}; + +extern const struct +ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ]; + +/* Table of constants related to possible TIME_REF sources */ +extern const struct ice_time_ref_info_e822 e822_time_ref[NUM_ICE_TIME_REF_FREQ]; + +/* Table of constants for Vernier calibration on E822 */ +extern const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD]; + /* Increment value to generate nanoseconds in the GLTSYN_TIME_L register for * the E810 devices. Based off of a PLL with an 812.5 MHz frequency. */ @@ -27,6 +133,59 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval); int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj); int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp); int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); +int ice_ptp_init_phc(struct ice_hw *hw); + +/* E822 family functions */ +int ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val); +int ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val); +int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val); +int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val); +int ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time); + +/** + * ice_e822_time_ref - Get the current TIME_REF from capabilities + * @hw: pointer to the HW structure + * + * Returns the current TIME_REF from the capabilities structure. + */ +static inline enum ice_time_ref_freq ice_e822_time_ref(struct ice_hw *hw) +{ + return hw->func_caps.ts_func_info.time_ref; +} + +/** + * ice_set_e822_time_ref - Set new TIME_REF + * @hw: pointer to the HW structure + * @time_ref: new TIME_REF to set + * + * Update the TIME_REF in the capabilities structure in response to some + * change, such as an update to the CGU registers. + */ +static inline void +ice_set_e822_time_ref(struct ice_hw *hw, enum ice_time_ref_freq time_ref) +{ + hw->func_caps.ts_func_info.time_ref = time_ref; +} + +static inline u64 ice_e822_pll_freq(enum ice_time_ref_freq time_ref) +{ + return e822_time_ref[time_ref].pll_freq; +} + +static inline u64 ice_e822_nominal_incval(enum ice_time_ref_freq time_ref) +{ + return e822_time_ref[time_ref].nominal_incval; +} + +static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref) +{ + return e822_time_ref[time_ref].pps_delay; +} + +/* E822 Vernier calibration functions */ +int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset); +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass); +int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port); /* E810 family functions */ int ice_ptp_init_phy_e810(struct ice_hw *hw); @@ -36,19 +195,194 @@ bool ice_is_pca9575_present(struct ice_hw *hw); #define PFTSYN_SEM_BYTES 4 +#define ICE_PTP_CLOCK_INDEX_0 0x00 +#define ICE_PTP_CLOCK_INDEX_1 0x01 + /* PHY timer commands */ #define SEL_CPK_SRC 8 +#define SEL_PHY_SRC 3 /* Time Sync command Definitions */ #define GLTSYN_CMD_INIT_TIME BIT(0) #define GLTSYN_CMD_INIT_INCVAL BIT(1) +#define GLTSYN_CMD_INIT_TIME_INCVAL (BIT(0) | BIT(1)) #define GLTSYN_CMD_ADJ_TIME BIT(2) #define GLTSYN_CMD_ADJ_INIT_TIME (BIT(2) | BIT(3)) #define GLTSYN_CMD_READ_TIME BIT(7) +/* PHY port Time Sync command definitions */ +#define PHY_CMD_INIT_TIME BIT(0) +#define PHY_CMD_INIT_INCVAL BIT(1) +#define PHY_CMD_ADJ_TIME (BIT(0) | BIT(1)) +#define PHY_CMD_ADJ_TIME_AT_TIME (BIT(0) | BIT(2)) +#define PHY_CMD_READ_TIME (BIT(0) | BIT(1) | BIT(2)) + #define TS_CMD_MASK_E810 0xFF +#define TS_CMD_MASK 0xF #define SYNC_EXEC_CMD 0x3 +/* Macros to derive port low and high addresses on both quads */ +#define P_Q0_L(a, p) ((((a) + (0x2000 * (p)))) & 0xFFFF) +#define P_Q0_H(a, p) ((((a) + (0x2000 * (p)))) >> 16) +#define P_Q1_L(a, p) ((((a) - (0x2000 * ((p) - ICE_PORTS_PER_QUAD)))) & 0xFFFF) +#define P_Q1_H(a, p) ((((a) - (0x2000 * ((p) - ICE_PORTS_PER_QUAD)))) >> 16) + +/* PHY QUAD register base addresses */ +#define Q_0_BASE 0x94000 +#define Q_1_BASE 0x114000 + +/* Timestamp memory reset registers */ +#define Q_REG_TS_CTRL 0x618 +#define Q_REG_TS_CTRL_S 0 +#define Q_REG_TS_CTRL_M BIT(0) + +/* Timestamp availability status registers */ +#define Q_REG_TX_MEMORY_STATUS_L 0xCF0 +#define Q_REG_TX_MEMORY_STATUS_U 0xCF4 + +/* Tx FIFO status registers */ +#define Q_REG_FIFO23_STATUS 0xCF8 +#define Q_REG_FIFO01_STATUS 0xCFC +#define Q_REG_FIFO02_S 0 +#define Q_REG_FIFO02_M ICE_M(0x3FF, 0) +#define Q_REG_FIFO13_S 10 +#define Q_REG_FIFO13_M ICE_M(0x3FF, 10) + +/* Interrupt control Config registers */ +#define Q_REG_TX_MEM_GBL_CFG 0xC08 +#define Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_S 0 +#define Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M BIT(0) +#define Q_REG_TX_MEM_GBL_CFG_TX_TYPE_S 1 +#define Q_REG_TX_MEM_GBL_CFG_TX_TYPE_M ICE_M(0xFF, 1) +#define Q_REG_TX_MEM_GBL_CFG_INTR_THR_S 9 +#define Q_REG_TX_MEM_GBL_CFG_INTR_THR_M ICE_M(0x3F, 9) +#define Q_REG_TX_MEM_GBL_CFG_INTR_ENA_S 15 +#define Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M BIT(15) + +/* Tx Timestamp data registers */ +#define Q_REG_TX_MEMORY_BANK_START 0xA00 + +/* PHY port register base addresses */ +#define P_0_BASE 0x80000 +#define P_4_BASE 0x106000 + +/* Timestamp init registers */ +#define P_REG_RX_TIMER_INC_PRE_L 0x46C +#define P_REG_RX_TIMER_INC_PRE_U 0x470 +#define P_REG_TX_TIMER_INC_PRE_L 0x44C +#define P_REG_TX_TIMER_INC_PRE_U 0x450 + +/* Timestamp match and adjust target registers */ +#define P_REG_RX_TIMER_CNT_ADJ_L 0x474 +#define P_REG_RX_TIMER_CNT_ADJ_U 0x478 +#define P_REG_TX_TIMER_CNT_ADJ_L 0x454 +#define P_REG_TX_TIMER_CNT_ADJ_U 0x458 + +/* Timestamp capture registers */ +#define P_REG_RX_CAPTURE_L 0x4D8 +#define P_REG_RX_CAPTURE_U 0x4DC +#define P_REG_TX_CAPTURE_L 0x4B4 +#define P_REG_TX_CAPTURE_U 0x4B8 + +/* Timestamp PHY incval registers */ +#define P_REG_TIMETUS_L 0x410 +#define P_REG_TIMETUS_U 0x414 + +#define P_REG_40B_LOW_M 0xFF +#define P_REG_40B_HIGH_S 8 + +/* PHY window length registers */ +#define P_REG_WL 0x40C + +#define PTP_VERNIER_WL 0x111ed + +/* PHY start registers */ +#define P_REG_PS 0x408 +#define P_REG_PS_START_S 0 +#define P_REG_PS_START_M BIT(0) +#define P_REG_PS_BYPASS_MODE_S 1 +#define P_REG_PS_BYPASS_MODE_M BIT(1) +#define P_REG_PS_ENA_CLK_S 2 +#define P_REG_PS_ENA_CLK_M BIT(2) +#define P_REG_PS_LOAD_OFFSET_S 3 +#define P_REG_PS_LOAD_OFFSET_M BIT(3) +#define P_REG_PS_SFT_RESET_S 11 +#define P_REG_PS_SFT_RESET_M BIT(11) + +/* PHY offset valid registers */ +#define P_REG_TX_OV_STATUS 0x4D4 +#define P_REG_TX_OV_STATUS_OV_S 0 +#define P_REG_TX_OV_STATUS_OV_M BIT(0) +#define P_REG_RX_OV_STATUS 0x4F8 +#define P_REG_RX_OV_STATUS_OV_S 0 +#define P_REG_RX_OV_STATUS_OV_M BIT(0) + +/* PHY offset ready registers */ +#define P_REG_TX_OR 0x45C +#define P_REG_RX_OR 0x47C + +/* PHY total offset registers */ +#define P_REG_TOTAL_RX_OFFSET_L 0x460 +#define P_REG_TOTAL_RX_OFFSET_U 0x464 +#define P_REG_TOTAL_TX_OFFSET_L 0x440 +#define P_REG_TOTAL_TX_OFFSET_U 0x444 + +/* Timestamp PAR/PCS registers */ +#define P_REG_UIX66_10G_40G_L 0x480 +#define P_REG_UIX66_10G_40G_U 0x484 +#define P_REG_UIX66_25G_100G_L 0x488 +#define P_REG_UIX66_25G_100G_U 0x48C +#define P_REG_DESK_PAR_RX_TUS_L 0x490 +#define P_REG_DESK_PAR_RX_TUS_U 0x494 +#define P_REG_DESK_PAR_TX_TUS_L 0x498 +#define P_REG_DESK_PAR_TX_TUS_U 0x49C +#define P_REG_DESK_PCS_RX_TUS_L 0x4A0 +#define P_REG_DESK_PCS_RX_TUS_U 0x4A4 +#define P_REG_DESK_PCS_TX_TUS_L 0x4A8 +#define P_REG_DESK_PCS_TX_TUS_U 0x4AC +#define P_REG_PAR_RX_TUS_L 0x420 +#define P_REG_PAR_RX_TUS_U 0x424 +#define P_REG_PAR_TX_TUS_L 0x428 +#define P_REG_PAR_TX_TUS_U 0x42C +#define P_REG_PCS_RX_TUS_L 0x430 +#define P_REG_PCS_RX_TUS_U 0x434 +#define P_REG_PCS_TX_TUS_L 0x438 +#define P_REG_PCS_TX_TUS_U 0x43C +#define P_REG_PAR_RX_TIME_L 0x4F0 +#define P_REG_PAR_RX_TIME_U 0x4F4 +#define P_REG_PAR_TX_TIME_L 0x4CC +#define P_REG_PAR_TX_TIME_U 0x4D0 +#define P_REG_PAR_PCS_RX_OFFSET_L 0x4E8 +#define P_REG_PAR_PCS_RX_OFFSET_U 0x4EC +#define P_REG_PAR_PCS_TX_OFFSET_L 0x4C4 +#define P_REG_PAR_PCS_TX_OFFSET_U 0x4C8 +#define P_REG_LINK_SPEED 0x4FC +#define P_REG_LINK_SPEED_SERDES_S 0 +#define P_REG_LINK_SPEED_SERDES_M ICE_M(0x7, 0) +#define P_REG_LINK_SPEED_FEC_MODE_S 3 +#define P_REG_LINK_SPEED_FEC_MODE_M ICE_M(0x3, 3) +#define P_REG_LINK_SPEED_FEC_MODE(reg) \ + (((reg) & P_REG_LINK_SPEED_FEC_MODE_M) >> \ + P_REG_LINK_SPEED_FEC_MODE_S) + +/* PHY timestamp related registers */ +#define P_REG_PMD_ALIGNMENT 0x0FC +#define P_REG_RX_80_TO_160_CNT 0x6FC +#define P_REG_RX_80_TO_160_CNT_RXCYC_S 0 +#define P_REG_RX_80_TO_160_CNT_RXCYC_M BIT(0) +#define P_REG_RX_40_TO_160_CNT 0x8FC +#define P_REG_RX_40_TO_160_CNT_RXCYC_S 0 +#define P_REG_RX_40_TO_160_CNT_RXCYC_M ICE_M(0x3, 0) + +/* Rx FIFO status registers */ +#define P_REG_RX_OV_FS 0x4F8 +#define P_REG_RX_OV_FS_FIFO_STATUS_S 2 +#define P_REG_RX_OV_FS_FIFO_STATUS_M ICE_M(0x3FF, 2) + +/* Timestamp command registers */ +#define P_REG_TX_TMR_CMD 0x448 +#define P_REG_RX_TMR_CMD 0x468 + /* E810 timesync enable register */ #define ETH_GLTSYN_ENA(_i) (0x03000348 + ((_i) * 4)) @@ -68,9 +402,20 @@ bool ice_is_pca9575_present(struct ice_hw *hw); /* Timestamp block macros */ #define TS_LOW_M 0xFFFFFFFF +#define TS_HIGH_M 0xFF #define TS_HIGH_S 32 +#define TS_PHY_LOW_M 0xFF +#define TS_PHY_HIGH_M 0xFFFFFFFF +#define TS_PHY_HIGH_S 8 + #define BYTES_PER_IDX_ADDR_L_U 8 +#define BYTES_PER_IDX_ADDR_L 4 + +/* Internal PHY timestamp address */ +#define TS_L(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U)) +#define TS_H(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U + \ + BYTES_PER_IDX_ADDR_L)) /* External PHY timestamp address */ #define TS_EXT(a, port, idx) ((a) + (0x1000 * (port)) + \ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 58b1907e3ff1..caf0a02b25f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -298,9 +298,30 @@ struct ice_hw_common_caps { #define ICE_TS_TMR_IDX_ASSOC_S 24 #define ICE_TS_TMR_IDX_ASSOC_M BIT(24) +/* TIME_REF clock rate specification */ +enum ice_time_ref_freq { + ICE_TIME_REF_FREQ_25_000 = 0, + ICE_TIME_REF_FREQ_122_880 = 1, + ICE_TIME_REF_FREQ_125_000 = 2, + ICE_TIME_REF_FREQ_153_600 = 3, + ICE_TIME_REF_FREQ_156_250 = 4, + ICE_TIME_REF_FREQ_245_760 = 5, + + NUM_ICE_TIME_REF_FREQ +}; + +/* Clock source specification */ +enum ice_clk_src { + ICE_CLK_SRC_TCX0 = 0, /* Temperature compensated oscillator */ + ICE_CLK_SRC_TIME_REF = 1, /* Use TIME_REF reference clock */ + + NUM_ICE_CLK_SRC +}; + struct ice_ts_func_info { /* Function specific info */ - u32 clk_freq; + enum ice_time_ref_freq time_ref; + u8 clk_freq; u8 clk_src; u8 tmr_index_assoc; u8 ena; diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 9265901455cd..b9b9d35494d2 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -792,7 +792,6 @@ s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) **/ s32 igb_init_nvm_params_i210(struct e1000_hw *hw) { - s32 ret_val = 0; struct e1000_nvm_info *nvm = &hw->nvm; nvm->ops.acquire = igb_acquire_nvm_i210; @@ -813,7 +812,7 @@ s32 igb_init_nvm_params_i210(struct e1000_hw *hw) nvm->ops.validate = NULL; nvm->ops.update = NULL; } - return ret_val; + return 0; } /** diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 4d988da68394..b78407289741 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1520,7 +1520,7 @@ static void igbvf_reset(struct igbvf_adapter *adapter) /* Allow time for pending master requests to run */ if (mac->ops.reset_hw(hw)) - dev_warn(&adapter->pdev->dev, "PF still resetting\n"); + dev_info(&adapter->pdev->dev, "PF still resetting\n"); mac->ops.init_hw(hw); diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index c7fe61509d5b..5c66b97c0cfa 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -85,9 +85,6 @@ #define IGC_WUFC_EXT_FILTER_MASK GENMASK(31, 8) -/* Physical Func Reset Done Indication */ -#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000 - /* Loop limit on how long we wait for auto-negotiation to complete */ #define COPPER_LINK_UP_LIMIT 10 #define PHY_AUTO_NEG_LIMIT 45 @@ -584,7 +581,6 @@ #define IGC_GEN_POLL_TIMEOUT 1920 /* PHY Control Register */ -#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ #define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ #define MII_CR_POWER_DOWN 0x0800 /* Power down */ #define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ @@ -605,9 +601,6 @@ #define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ #define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ -/* Bit definitions for valid PHY IDs. I = Integrated E = External */ -#define I225_I_PHY_ID 0x67C9DC00 - /* MDI Control */ #define IGC_MDIC_DATA_MASK 0x0000FFFF #define IGC_MDIC_REG_MASK 0x001F0000 diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 587db7483f25..b1e72ec5f131 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -55,7 +55,6 @@ enum igc_mac_type { enum igc_phy_type { igc_phy_unknown = 0, - igc_phy_none, igc_phy_i225, }; @@ -68,8 +67,6 @@ enum igc_media_type { enum igc_nvm_type { igc_nvm_unknown = 0, igc_nvm_eeprom_spi, - igc_nvm_flash_hw, - igc_nvm_invm, }; struct igc_info { diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index b6807e16eea9..66ea566488d1 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -473,13 +473,11 @@ s32 igc_init_nvm_params_i225(struct igc_hw *hw) /* NVM Function Pointers */ if (igc_get_flash_presence_i225(hw)) { - hw->nvm.type = igc_nvm_flash_hw; nvm->ops.read = igc_read_nvm_srrd_i225; nvm->ops.write = igc_write_nvm_srwr_i225; nvm->ops.validate = igc_validate_nvm_checksum_i225; nvm->ops.update = igc_update_nvm_checksum_i225; } else { - hw->nvm.type = igc_nvm_invm; nvm->ops.read = igc_read_nvm_eerd; nvm->ops.write = NULL; nvm->ops.validate = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 1c98652b244a..d1093bb2d436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -546,6 +546,13 @@ static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 return 0; } +static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL; +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, @@ -570,6 +577,10 @@ static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_enable_remote_dev_reset_get, mlx5_devlink_enable_remote_dev_reset_set, NULL), + DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), + DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) @@ -608,6 +619,16 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) value); } #endif + + value.vu32 = MLX5_COMP_EQ_SIZE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + value); + + value.vu32 = MLX5_NUM_ASYNC_EQE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + value); } static const struct devlink_param enable_eth_param = @@ -752,6 +773,66 @@ static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink) mlx5_devlink_eth_param_unregister(devlink); } +static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (val.vu32 == 0) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0"); + return -EINVAL; + } + + if (!is_power_of_2(val.vu32)) { + NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs"); + return -EINVAL; + } + + if (ilog2(val.vu32) > + MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param max_uc_list_param = + DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_max_uc_list_validate); + +static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return 0; + + err = devlink_param_register(devlink, &max_uc_list_param); + if (err) + return err; + + value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + return 0; +} + +static void +mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return; + + devlink_param_unregister(devlink, &max_uc_list_param); +} + #define MLX5_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ @@ -811,6 +892,10 @@ int mlx5_devlink_register(struct devlink *devlink) if (err) goto auxdev_reg_err; + err = mlx5_devlink_max_uc_list_param_register(devlink); + if (err) + goto max_uc_list_err; + err = mlx5_devlink_traps_register(devlink); if (err) goto traps_reg_err; @@ -821,6 +906,8 @@ int mlx5_devlink_register(struct devlink *devlink) return 0; traps_reg_err: + mlx5_devlink_max_uc_list_param_unregister(devlink); +max_uc_list_err: mlx5_devlink_auxdev_params_unregister(devlink); auxdev_reg_err: devlink_params_unregister(devlink, mlx5_devlink_params, @@ -831,6 +918,7 @@ auxdev_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { mlx5_devlink_traps_unregister(devlink); + mlx5_devlink_max_uc_list_param_unregister(devlink); mlx5_devlink_auxdev_params_unregister(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e77c4159713f..33679467c63a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -145,7 +145,6 @@ struct page_pool; #define MLX5E_MIN_NUM_CHANNELS 0x1 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) -#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ @@ -875,10 +874,8 @@ struct mlx5e_trap; struct mlx5e_priv { /* priv data path fields - start */ - /* +1 for port ptp ts */ - struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC + - MLX5E_QOS_MAX_LEAF_NODES]; - int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct mlx5e_txqsq **txq2sq; + int **channel_tc2realtxq; int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx_dp dcbx_dp; @@ -893,7 +890,7 @@ struct mlx5e_priv { struct mlx5e_channels channels; u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; struct mlx5e_rx_res *rx_res; - u32 tx_rates[MLX5E_MAX_NUM_SQS]; + u32 *tx_rates; struct mlx5e_flow_steering fs; @@ -909,7 +906,7 @@ struct mlx5e_priv { struct net_device *netdev; struct mlx5e_trap *en_trap; struct mlx5e_stats stats; - struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_channel_stats **channel_stats; struct mlx5e_channel_stats trap_stats; struct mlx5e_ptp_stats ptp_stats; u16 stats_nch; @@ -956,6 +953,12 @@ struct mlx5e_rx_handlers { extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic; +enum mlx5e_profile_feature { + MLX5E_PROFILE_FEATURE_PTP_RX, + MLX5E_PROFILE_FEATURE_PTP_TX, + MLX5E_PROFILE_FEATURE_QOS_HTB, +}; + struct mlx5e_profile { int (*init)(struct mlx5_core_dev *mdev, struct net_device *netdev); @@ -969,14 +972,18 @@ struct mlx5e_profile { int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); + int (*max_nch_limit)(struct mlx5_core_dev *mdev); unsigned int (*stats_grps_num)(struct mlx5e_priv *priv); mlx5e_stats_grp_t *stats_grps; const struct mlx5e_rx_handlers *rx_handlers; int max_tc; u8 rq_groups; - bool rx_ptp_support; + u32 features; }; +#define mlx5e_profile_feature_cap(profile, feature) \ + ((profile)->features & (MLX5E_PROFILE_FEATURE_## feature)) + void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); @@ -1188,8 +1195,7 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev); void mlx5e_priv_cleanup(struct mlx5e_priv *priv); struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - unsigned int txqs, unsigned int rxqs); +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile); int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c index d290d7276b8d..074ffa4fa5af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -20,7 +20,7 @@ mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, struct mlx5e_channel_stats *stats; int tc; - stats = &priv->channel_stats[ch]; + stats = priv->channel_stats[ch]; data->rx_packets = stats->rq.packets; data->rx_bytes = stats->rq.bytes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 18d542b1c5cb..82baafd3c00c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -768,7 +768,7 @@ int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv) { struct mlx5e_ptp_fs *ptp_fs; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) return 0; ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL); @@ -783,7 +783,7 @@ void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv) { struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) return; mlx5e_ptp_rx_unset_fs(priv); @@ -794,7 +794,7 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) { struct mlx5e_ptp *c = priv->channels.ptp; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) return 0; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index fcb0892c08a9..0991345c4ae5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -517,6 +517,9 @@ int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void * void *data, void (*cleanup)(struct flow_block_cb *block_cb)) { + if (!netdev) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_BLOCK: return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 0015a81eb9a1..24c32f73040a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -37,7 +37,6 @@ struct mlx5e_rx_res { /* API for rx_res_rss_* */ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; @@ -52,7 +51,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, return -ENOMEM; err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn, - init_pkt_merge_param); + &res->pkt_merge_param); if (err) goto err_rss_free; @@ -277,8 +276,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); } -static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, - const struct mlx5e_packet_merge_param *init_pkt_merge_param) +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; struct mlx5e_tir_builder *builder; @@ -309,7 +307,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), inner_ft_support); - mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); @@ -339,7 +337,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), inner_ft_support); - mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true); @@ -454,11 +452,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, res->pkt_merge_param = *init_pkt_merge_param; init_rwsem(&res->pkt_merge_param_sem); - err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch); + err = mlx5e_rx_res_rss_init_def(res, init_nch); if (err) goto err_out; - err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param); + err = mlx5e_rx_res_channels_init(res); if (err) goto err_rss_destroy; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 538bc2419bd8..5f2b67b9c189 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -67,7 +67,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; rq->xsk_pool = pool; - rq->stats = &c->priv->channel_stats[c->ix].xskrq; + rq->stats = &c->priv->channel_stats[c->ix]->xskrq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK; err = mlx5e_rq_set_handlers(rq, params, xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 15711814d2d2..96064a2033f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -611,7 +611,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->rxq = rxq; priv_rx->sk = sk; - priv_rx->rq_stats = &priv->channel_stats[rxq].rq; + priv_rx->rq_stats = &priv->channel_stats[rxq]->rq; priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index fe5d82fa6e92..49cca6bd49a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -556,7 +556,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); - priv->channel_stats[arfs_rule->rxq].rq.arfs_err++; + priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; mlx5e_dbg(HW, priv, "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", __func__, arfs_rule->filter_id, arfs_rule->rxq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c8757c5a812b..536fcb2c5e90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1934,7 +1934,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; - if (new_val && !priv->profile->rx_ptp_support && rx_filter) { + if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) { netdev_err(priv->netdev, "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 496977e7406e..1a47108805fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -479,7 +479,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; - rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->stats = &c->priv->channel_stats[c->ix]->rq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); err = mlx5e_rq_set_handlers(rq, params, NULL); if (err) @@ -1161,10 +1161,10 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->xsk_pool = xsk_pool; sq->stats = sq->xsk_pool ? - &c->priv->channel_stats[c->ix].xsksq : + &c->priv->channel_stats[c->ix]->xsksq : is_redirect ? - &c->priv->channel_stats[c->ix].xdpsq : - &c->priv->channel_stats[c->ix].rq_xdpsq; + &c->priv->channel_stats[c->ix]->xdpsq : + &c->priv->channel_stats[c->ix]->rq_xdpsq; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1928,7 +1928,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, params, &cparam->txq_sq, &c->sq[tc], tc, qos_queue_group_id, - &c->priv->channel_stats[c->ix].sq[tc]); + &c->priv->channel_stats[c->ix]->sq[tc]); if (err) goto err_close_sqs; } @@ -2176,6 +2176,30 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); } +static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) +{ + if (ix > priv->stats_nch) { + netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix, + priv->stats_nch); + return -EINVAL; + } + + if (priv->channel_stats[ix]) + return 0; + + /* Asymmetric dynamic memory allocation. + * Freed in mlx5e_priv_arrays_free, not on channel closure. + */ + mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix); + priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), + GFP_KERNEL, cpu_to_node(cpu)); + if (!priv->channel_stats[ix]) + return -ENOMEM; + priv->stats_nch++; + + return 0; +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, @@ -2193,6 +2217,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) return err; + err = mlx5e_channel_stats_alloc(priv, ix, cpu); + if (err) + return err; + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; @@ -2207,7 +2235,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); c->num_tc = mlx5e_get_dcb_num_tc(params); c->xdp = !!params->xdp_prog; - c->stats = &priv->channel_stats[ix].ch; + c->stats = &priv->channel_stats[ix]->ch; c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); @@ -3371,7 +3399,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) int i; for (i = 0; i < priv->stats_nch; i++) { - struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; int j; @@ -4038,7 +4066,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) goto err_unlock; } - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) err = mlx5e_hwstamp_config_no_ptp_rx(priv, config.rx_filter != HWTSTAMP_FILTER_NONE); else @@ -5093,9 +5121,23 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_nic_stats_grps, .stats_grps_num = mlx5e_nic_stats_grps_num, - .rx_ptp_support = true, + .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) | + BIT(MLX5E_PROFILE_FEATURE_PTP_TX) | + BIT(MLX5E_PROFILE_FEATURE_QOS_HTB), }; +static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + int nch; + + nch = mlx5e_get_max_num_channels(mdev); + + if (profile->max_nch_limit) + nch = min_t(int, nch, profile->max_nch_limit(mdev)); + return nch; +} + static unsigned int mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile) @@ -5104,7 +5146,7 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, unsigned int max_nch, tmp; /* core resources */ - max_nch = mlx5e_get_max_num_channels(mdev); + max_nch = mlx5e_profile_max_num_channels(mdev, profile); /* netdev rx queues */ tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); @@ -5128,12 +5170,17 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, struct net_device *netdev, struct mlx5_core_dev *mdev) { + int nch, num_txqs, node, i; + + num_txqs = netdev->num_tx_queues; + nch = mlx5e_calc_max_nch(mdev, netdev, profile); + node = dev_to_node(mlx5_core_dma_dev(mdev)); + /* priv init */ priv->mdev = mdev; priv->netdev = netdev; priv->msglevel = MLX5E_MSG_LEVEL; - priv->max_nch = mlx5e_calc_max_nch(mdev, netdev, profile); - priv->stats_nch = priv->max_nch; + priv->max_nch = nch; priv->max_opened_tc = 1; if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) @@ -5150,11 +5197,46 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, if (!priv->wq) goto err_free_cpumask; + priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); + if (!priv->txq2sq) + goto err_destroy_workqueue; + + priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node); + if (!priv->tx_rates) + goto err_free_txq2sq; + + priv->channel_tc2realtxq = + kcalloc_node(nch, sizeof(*priv->channel_tc2realtxq), GFP_KERNEL, node); + if (!priv->channel_tc2realtxq) + goto err_free_tx_rates; + + for (i = 0; i < nch; i++) { + priv->channel_tc2realtxq[i] = + kcalloc_node(profile->max_tc, sizeof(**priv->channel_tc2realtxq), + GFP_KERNEL, node); + if (!priv->channel_tc2realtxq[i]) + goto err_free_channel_tc2realtxq; + } + + priv->channel_stats = + kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); + if (!priv->channel_stats) + goto err_free_channel_tc2realtxq; + return 0; +err_free_channel_tc2realtxq: + while (--i >= 0) + kfree(priv->channel_tc2realtxq[i]); + kfree(priv->channel_tc2realtxq); +err_free_tx_rates: + kfree(priv->tx_rates); +err_free_txq2sq: + kfree(priv->txq2sq); +err_destroy_workqueue: + destroy_workqueue(priv->wq); err_free_cpumask: free_cpumask_var(priv->scratchpad.cpumask); - return -ENOMEM; } @@ -5166,6 +5248,14 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) if (!priv->mdev) return; + for (i = 0; i < priv->stats_nch; i++) + kvfree(priv->channel_stats[i]); + kfree(priv->channel_stats); + for (i = 0; i < priv->max_nch; i++) + kfree(priv->channel_tc2realtxq[i]); + kfree(priv->channel_tc2realtxq); + kfree(priv->tx_rates); + kfree(priv->txq2sq); destroy_workqueue(priv->wq); free_cpumask_var(priv->scratchpad.cpumask); @@ -5181,13 +5271,44 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) memset(priv, 0, sizeof(*priv)); } +static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + unsigned int nch, ptp_txqs, qos_txqs; + + nch = mlx5e_profile_max_num_channels(mdev, profile); + + ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) && + mlx5e_profile_feature_cap(profile, PTP_TX) ? + profile->max_tc : 0; + + qos_txqs = mlx5_qos_is_supported(mdev) && + mlx5e_profile_feature_cap(profile, QOS_HTB) ? + mlx5e_qos_max_leaf_nodes(mdev) : 0; + + return nch * profile->max_tc + ptp_txqs + qos_txqs; +} + +static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + unsigned int nch; + + nch = mlx5e_profile_max_num_channels(mdev, profile); + + return nch * profile->rq_groups; +} + struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - unsigned int txqs, unsigned int rxqs) +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile) { struct net_device *netdev; + unsigned int txqs, rxqs; int err; + txqs = mlx5e_get_max_num_txqs(mdev, profile); + rxqs = mlx5e_get_max_num_rxqs(mdev, profile); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); @@ -5432,22 +5553,10 @@ static int mlx5e_probe(struct auxiliary_device *adev, struct mlx5_core_dev *mdev = edev->mdev; struct net_device *netdev; pm_message_t state = {}; - unsigned int txqs, rxqs, ptp_txqs = 0; struct mlx5e_priv *priv; - int qos_sqs = 0; int err; - int nch; - - if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) - ptp_txqs = profile->max_tc; - if (mlx5_qos_is_supported(mdev)) - qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); - - nch = mlx5e_get_max_num_channels(mdev); - txqs = nch * profile->max_tc + ptp_txqs + qos_sqs; - rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(mdev, profile, txqs, rxqs); + netdev = mlx5e_create_netdev(mdev, profile); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6e0f88ea3701..8c0f4cfbe471 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -591,6 +591,12 @@ bool mlx5e_eswitch_vf_rep(const struct net_device *netdev) return netdev->netdev_ops == &mlx5e_netdev_ops_rep; } +static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) +{ + return (1 << MLX5_CAP_GEN(mdev, log_max_tir)) / + mlx5_eswitch_get_total_vports(mdev); +} + static void mlx5e_build_rep_params(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -1113,7 +1119,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, .stats_grps_num = mlx5e_rep_stats_grps_num, - .rx_ptp_support = false, + .max_nch_limit = mlx5e_rep_max_nch_limit, }; static const struct mlx5e_profile mlx5e_uplink_rep_profile = { @@ -1134,7 +1140,6 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_ul_rep_stats_grps, .stats_grps_num = mlx5e_ul_rep_stats_grps_num, - .rx_ptp_support = false, }; /* e-Switch vport representors */ @@ -1185,14 +1190,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct devlink_port *dl_port; struct net_device *netdev; struct mlx5e_priv *priv; - unsigned int txqs, rxqs; - int nch, err; + int err; profile = &mlx5e_rep_profile; - nch = mlx5e_get_max_num_channels(dev); - txqs = nch * profile->max_tc; - rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(dev, profile, txqs, rxqs); + netdev = mlx5e_create_netdev(dev, profile); if (!netdev) { mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7e05d7592bce..f09b57c31ed7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2189,7 +2189,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = &priv->channel_stats[rq->ix].rq; + stats = rq->stats; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 3c91a11e27ad..73fcd9fb17dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -463,7 +463,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = - &priv->channel_stats[i]; + priv->channel_stats[i]; int j; mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); @@ -2197,21 +2197,21 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_CH_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch, ch_stats_desc, j); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq, rq_stats_desc, j); for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq, xskrq_stats_desc, j); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq, rq_xdpsq_stats_desc, j); } @@ -2219,17 +2219,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_SQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc], sq_stats_desc, j); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq, xsksq_stats_desc, j); for (j = 0; j < NUM_XDPSQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq, xdpsq_stats_desc, j); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 792e0d6aa861..b695aad71ee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -19,6 +19,7 @@ #include "lib/clock.h" #include "diag/fw_tracer.h" #include "mlx5_irq.h" +#include "devlink.h" enum { MLX5_EQE_OWNER_INIT_VAL = 0x1, @@ -622,6 +623,20 @@ static void cleanup_async_eq(struct mlx5_core_dev *dev, name, err); } +static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_NUM_ASYNC_EQE; +} static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -646,7 +661,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) param = (struct mlx5_eq_param) { .irq_index = MLX5_IRQ_EQ_CTRL, - .nent = MLX5_NUM_ASYNC_EQE, + .nent = async_eq_depth_devlink_param_get(dev), }; gather_async_events_mask(dev, param.mask); @@ -796,6 +811,21 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) } } +static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_COMP_EQ_SIZE; +} + static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -807,7 +837,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&table->comp_eqs_list); ncomp_eqs = table->num_comp_eqs; - nent = MLX5_COMP_EQ_SIZE; + nent = comp_eq_depth_devlink_param_get(dev); for (i = 0; i < ncomp_eqs; i++) { struct mlx5_eq_param param = {}; int vecidx = i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 513f741d16c7..ead5e8acc8be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -343,9 +343,6 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); -bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); -int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); - /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 051b20ec7bdb..0a99a020a3b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -117,7 +117,7 @@ static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) struct mlx5e_channel_stats *channel_stats; struct mlx5e_rq_stats *rq_stats; - channel_stats = &priv->channel_stats[i]; + channel_stats = priv->channel_stats[i]; rq_stats = &channel_stats->rq; s.rx_packets += rq_stats->packets; @@ -449,7 +449,6 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5i_stats_grps, .stats_grps_num = mlx5i_stats_grps_num, - .rx_ptp_support = false, }; /* mlx5i netdev NDos */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 5308f23702bc..0b86e78dbc0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -350,7 +350,6 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .rx_handlers = &mlx5i_rx_handlers, .max_tc = MLX5I_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), - .rx_ptp_support = false, }; const struct mlx5e_profile *mlx5i_pkey_get_profile(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d97c9e86d7b3..b1a82226623c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -484,10 +484,26 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); } +static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { struct mlx5_profile *prof = &dev->profile; void *set_hca_cap; + int max_uc_list; int err; err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); @@ -561,6 +577,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN(dev, roce_rw_supported)) MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); + max_uc_list = max_uc_list_get_devlink_param(dev); + if (max_uc_list > 0) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, + ilog2(max_uc_list)); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 7e89d5980d5e..866b9357939b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1708,6 +1708,124 @@ static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg, static const struct mlxsw_listener mlxsw_core_health_listener = MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE); +static int +mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val, tile_v; + int err; + + val = mlxsw_reg_mfde_fatal_cause_id_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "cause_id", val); + if (err) + return err; + tile_v = mlxsw_reg_mfde_fatal_cause_tile_v_get(mfde_pl); + if (tile_v) { + val = mlxsw_reg_mfde_fatal_cause_tile_index_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val); + if (err) + return err; + } + + return 0; +} + +static int +mlxsw_core_health_fw_fatal_dump_fw_assert(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val, tile_v; + int err; + + val = mlxsw_reg_mfde_fw_assert_var0_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var0", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var1_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var1", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var2_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var2", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var3_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var3", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_var4_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "var4", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_existptr_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "existptr", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_callra_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "callra", val); + if (err) + return err; + val = mlxsw_reg_mfde_fw_assert_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; + tile_v = mlxsw_reg_mfde_fw_assert_tile_v_get(mfde_pl); + if (tile_v) { + val = mlxsw_reg_mfde_fw_assert_tile_index_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val); + if (err) + return err; + } + val = mlxsw_reg_mfde_fw_assert_ext_synd_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", val); + if (err) + return err; + + return 0; +} + +static int +mlxsw_core_health_fw_fatal_dump_kvd_im_stop(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val; + int err; + + val = mlxsw_reg_mfde_kvd_im_stop_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; + val = mlxsw_reg_mfde_kvd_im_stop_pipes_mask_get(mfde_pl); + return devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val); +} + +static int +mlxsw_core_health_fw_fatal_dump_crspace_to(const char *mfde_pl, + struct devlink_fmsg *fmsg) +{ + u32 val; + int err; + + val = mlxsw_reg_mfde_crspace_to_log_address_get(mfde_pl); + err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val); + if (err) + return err; + val = mlxsw_reg_mfde_crspace_to_oe_get(mfde_pl); + err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val); + if (err) + return err; + val = mlxsw_reg_mfde_crspace_to_log_id_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val); + if (err) + return err; + val = mlxsw_reg_mfde_crspace_to_log_ip_get(mfde_pl); + err = devlink_fmsg_u64_pair_put(fmsg, "log_ip", val); + if (err) + return err; + + return 0; +} + static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, void *priv_ctx, struct netlink_ext_ack *extack) @@ -1741,6 +1859,46 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP: val_str = "KVD insertion machine stopped"; break; + case MLXSW_REG_MFDE_EVENT_ID_TEST: + val_str = "Test"; + break; + case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT: + val_str = "FW assert"; + break; + case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE: + val_str = "Fatal cause"; + break; + default: + val_str = NULL; + } + if (val_str) { + err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str); + if (err) + return err; + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "severity"); + if (err) + return err; + + val = mlxsw_reg_mfde_severity_get(mfde_pl); + err = devlink_fmsg_u8_pair_put(fmsg, "id", val); + if (err) + return err; + switch (val) { + case MLXSW_REG_MFDE_SEVERITY_FATL: + val_str = "Fatal"; + break; + case MLXSW_REG_MFDE_SEVERITY_NRML: + val_str = "Normal"; + break; + case MLXSW_REG_MFDE_SEVERITY_INTR: + val_str = "Debug"; + break; default: val_str = NULL; } @@ -1749,6 +1907,7 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor if (err) return err; } + err = devlink_fmsg_arr_pair_nest_end(fmsg); if (err) return err; @@ -1800,24 +1959,18 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor if (err) return err; - if (event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO) { - val = mlxsw_reg_mfde_log_address_get(mfde_pl); - err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val); - if (err) - return err; - val = mlxsw_reg_mfde_log_id_get(mfde_pl); - err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val); - if (err) - return err; - val = mlxsw_reg_mfde_log_ip_get(mfde_pl); - err = devlink_fmsg_u64_pair_put(fmsg, "log_ip", val); - if (err) - return err; - } else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) { - val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl); - err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val); - if (err) - return err; + switch (event_id) { + case MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO: + return mlxsw_core_health_fw_fatal_dump_crspace_to(mfde_pl, + fmsg); + case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP: + return mlxsw_core_health_fw_fatal_dump_kvd_im_stop(mfde_pl, + fmsg); + case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT: + return mlxsw_core_health_fw_fatal_dump_fw_assert(mfde_pl, fmsg); + case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE: + return mlxsw_core_health_fw_fatal_dump_fatal_cause(mfde_pl, + fmsg); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index f748b537bdab..c97d2c744725 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -11318,7 +11318,7 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, * ----------------------------------- */ #define MLXSW_REG_MFDE_ID 0x9200 -#define MLXSW_REG_MFDE_LEN 0x18 +#define MLXSW_REG_MFDE_LEN 0x30 MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN); @@ -11328,10 +11328,32 @@ MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN); */ MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 24, 8); +enum mlxsw_reg_mfde_severity { + /* Unrecoverable switch behavior */ + MLXSW_REG_MFDE_SEVERITY_FATL = 2, + /* Unexpected state with possible systemic failure */ + MLXSW_REG_MFDE_SEVERITY_NRML = 3, + /* Unexpected state without systemic failure */ + MLXSW_REG_MFDE_SEVERITY_INTR = 5, +}; + +/* reg_mfde_severity + * The severity of the event. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, severity, 0x00, 16, 8); + enum mlxsw_reg_mfde_event_id { + /* CRspace timeout */ MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO = 1, /* KVD insertion machine stopped */ MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP, + /* Triggered by MFGD.trigger_test */ + MLXSW_REG_MFDE_EVENT_ID_TEST, + /* Triggered when firmware hits an assert */ + MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT, + /* Fatal error interrupt from hardware */ + MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE, }; /* reg_mfde_event_id @@ -11372,32 +11394,110 @@ MLXSW_ITEM32(reg, mfde, command_type, 0x04, 24, 2); */ MLXSW_ITEM32(reg, mfde, reg_attr_id, 0x04, 0, 16); -/* reg_mfde_log_address +/* reg_mfde_crspace_to_log_address * crspace address accessed, which resulted in timeout. - * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO * Access: RO */ -MLXSW_ITEM32(reg, mfde, log_address, 0x10, 0, 32); +MLXSW_ITEM32(reg, mfde, crspace_to_log_address, 0x10, 0, 32); -/* reg_mfde_log_id +/* reg_mfde_crspace_to_oe + * 0 - New event + * 1 - Old event, occurred before MFGD activation. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, crspace_to_oe, 0x14, 24, 1); + +/* reg_mfde_crspace_to_log_id * Which irisc triggered the timeout. - * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO * Access: RO */ -MLXSW_ITEM32(reg, mfde, log_id, 0x14, 0, 4); +MLXSW_ITEM32(reg, mfde, crspace_to_log_id, 0x14, 0, 4); -/* reg_mfde_log_ip +/* reg_mfde_crspace_to_log_ip * IP (instruction pointer) that triggered the timeout. - * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO * Access: RO */ -MLXSW_ITEM64(reg, mfde, log_ip, 0x18, 0, 64); +MLXSW_ITEM64(reg, mfde, crspace_to_log_ip, 0x18, 0, 64); + +/* reg_mfde_kvd_im_stop_oe + * 0 - New event + * 1 - Old event, occurred before MFGD activation. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, kvd_im_stop_oe, 0x10, 24, 1); -/* reg_mfde_pipes_mask +/* reg_mfde_kvd_im_stop_pipes_mask * Bit per kvh pipe. * Access: RO */ -MLXSW_ITEM32(reg, mfde, pipes_mask, 0x10, 0, 16); +MLXSW_ITEM32(reg, mfde, kvd_im_stop_pipes_mask, 0x10, 0, 16); + +/* reg_mfde_fw_assert_var0-4 + * Variables passed to assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_var0, 0x10, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var1, 0x14, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var2, 0x18, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var3, 0x1C, 0, 32); +MLXSW_ITEM32(reg, mfde, fw_assert_var4, 0x20, 0, 32); + +/* reg_mfde_fw_assert_existptr + * The instruction pointer when assert was triggered. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_existptr, 0x24, 0, 32); + +/* reg_mfde_fw_assert_callra + * The return address after triggering assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_callra, 0x28, 0, 32); + +/* reg_mfde_fw_assert_oe + * 0 - New event + * 1 - Old event, occurred before MFGD activation. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_oe, 0x2C, 24, 1); + +/* reg_mfde_fw_assert_tile_v + * 0: The assert was from main + * 1: The assert was from a tile + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_tile_v, 0x2C, 23, 1); + +/* reg_mfde_fw_assert_tile_index + * When tile_v=1, the tile_index that caused the assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_tile_index, 0x2C, 16, 6); + +/* reg_mfde_fw_assert_ext_synd + * A generated one-to-one identifier which is specific per-assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fw_assert_ext_synd, 0x2C, 0, 16); + +/* reg_mfde_fatal_cause_id + * HW interrupt cause id. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fatal_cause_id, 0x10, 0, 18); + +/* reg_mfde_fatal_cause_tile_v + * 0: The assert was from main + * 1: The assert was from a tile + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fatal_cause_tile_v, 0x14, 23, 1); + +/* reg_mfde_fatal_cause_tile_index + * When tile_v=1, the tile_index that caused the assert. + * Access: RO + */ +MLXSW_ITEM32(reg, mfde, fatal_cause_tile_index, 0x14, 16, 6); /* TNGCR - Tunneling NVE General Configuration Register * ---------------------------------------------------- diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index be3791ca6069..bb417db773b9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -203,7 +203,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, */ burst = roundup_pow_of_two(act->police.burst); err = mlxsw_sp_acl_rulei_act_police(mlxsw_sp, rulei, - act->police.index, + act->hw_index, act->police.rate_bytes_ps, burst, extack); if (err) @@ -508,7 +508,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, struct flow_match_vlan match; flow_rule_match_vlan(rule, &match); - if (mlxsw_sp_flow_block_is_egress_bound(block)) { + if (mlxsw_sp_flow_block_is_egress_bound(block) && + match.mask->vlan_id) { NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig index 2860a8c9923d..ac273f84b69e 100644 --- a/drivers/net/ethernet/microchip/lan966x/Kconfig +++ b/drivers/net/ethernet/microchip/lan966x/Kconfig @@ -2,6 +2,7 @@ config LAN966X_SWITCH tristate "Lan966x switch driver" depends on HAS_IOMEM depends on OF + depends on NET_SWITCHDEV select PHYLINK select PACKING help diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 2989ba528236..ec1a1fa8b0d5 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ - lan966x_mac.o lan966x_ethtool.o + lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \ + lan966x_vlan.o lan966x_fdb.o diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c new file mode 100644 index 000000000000..da5ca7188679 --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdb.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <net/switchdev.h> + +#include "lan966x_main.h" + +struct lan966x_fdb_event_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + struct lan966x *lan966x; + unsigned long event; +}; + +struct lan966x_fdb_entry { + struct list_head list; + unsigned char mac[ETH_ALEN] __aligned(2); + u16 vid; + u32 references; +}; + +static struct lan966x_fdb_entry * +lan966x_fdb_find_entry(struct lan966x *lan966x, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct lan966x_fdb_entry *fdb_entry; + + list_for_each_entry(fdb_entry, &lan966x->fdb_entries, list) { + if (fdb_entry->vid == fdb_info->vid && + ether_addr_equal(fdb_entry->mac, fdb_info->addr)) + return fdb_entry; + } + + return NULL; +} + +static void lan966x_fdb_add_entry(struct lan966x *lan966x, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct lan966x_fdb_entry *fdb_entry; + + fdb_entry = lan966x_fdb_find_entry(lan966x, fdb_info); + if (fdb_entry) { + fdb_entry->references++; + return; + } + + fdb_entry = kzalloc(sizeof(*fdb_entry), GFP_KERNEL); + if (!fdb_entry) + return; + + ether_addr_copy(fdb_entry->mac, fdb_info->addr); + fdb_entry->vid = fdb_info->vid; + fdb_entry->references = 1; + list_add_tail(&fdb_entry->list, &lan966x->fdb_entries); +} + +static bool lan966x_fdb_del_entry(struct lan966x *lan966x, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct lan966x_fdb_entry *fdb_entry, *tmp; + + list_for_each_entry_safe(fdb_entry, tmp, &lan966x->fdb_entries, + list) { + if (fdb_entry->vid == fdb_info->vid && + ether_addr_equal(fdb_entry->mac, fdb_info->addr)) { + fdb_entry->references--; + if (!fdb_entry->references) { + list_del(&fdb_entry->list); + kfree(fdb_entry); + return true; + } + break; + } + } + + return false; +} + +void lan966x_fdb_write_entries(struct lan966x *lan966x, u16 vid) +{ + struct lan966x_fdb_entry *fdb_entry; + + list_for_each_entry(fdb_entry, &lan966x->fdb_entries, list) { + if (fdb_entry->vid != vid) + continue; + + lan966x_mac_cpu_learn(lan966x, fdb_entry->mac, fdb_entry->vid); + } +} + +void lan966x_fdb_erase_entries(struct lan966x *lan966x, u16 vid) +{ + struct lan966x_fdb_entry *fdb_entry; + + list_for_each_entry(fdb_entry, &lan966x->fdb_entries, list) { + if (fdb_entry->vid != vid) + continue; + + lan966x_mac_cpu_forget(lan966x, fdb_entry->mac, fdb_entry->vid); + } +} + +static void lan966x_fdb_purge_entries(struct lan966x *lan966x) +{ + struct lan966x_fdb_entry *fdb_entry, *tmp; + + list_for_each_entry_safe(fdb_entry, tmp, &lan966x->fdb_entries, list) { + list_del(&fdb_entry->list); + kfree(fdb_entry); + } +} + +int lan966x_fdb_init(struct lan966x *lan966x) +{ + INIT_LIST_HEAD(&lan966x->fdb_entries); + lan966x->fdb_work = alloc_ordered_workqueue("lan966x_order", 0); + if (!lan966x->fdb_work) + return -ENOMEM; + + return 0; +} + +void lan966x_fdb_deinit(struct lan966x *lan966x) +{ + destroy_workqueue(lan966x->fdb_work); + lan966x_fdb_purge_entries(lan966x); +} + +static void lan966x_fdb_event_work(struct work_struct *work) +{ + struct lan966x_fdb_event_work *fdb_work = + container_of(work, struct lan966x_fdb_event_work, work); + struct switchdev_notifier_fdb_info *fdb_info; + struct net_device *dev = fdb_work->dev; + struct lan966x_port *port; + struct lan966x *lan966x; + int ret; + + fdb_info = &fdb_work->fdb_info; + lan966x = fdb_work->lan966x; + + if (lan966x_netdevice_check(dev)) { + port = netdev_priv(dev); + + switch (fdb_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + if (!fdb_info->added_by_user) + break; + lan966x_mac_add_entry(lan966x, port, fdb_info->addr, + fdb_info->vid); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (!fdb_info->added_by_user) + break; + lan966x_mac_del_entry(lan966x, fdb_info->addr, + fdb_info->vid); + break; + } + } else { + if (!netif_is_bridge_master(dev)) + goto out; + + /* In case the bridge is called */ + switch (fdb_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + /* If there is no front port in this vlan, there is no + * point to copy the frame to CPU because it would be + * just dropped at later point. So add it only if + * there is a port but it is required to store the fdb + * entry for later point when a port actually gets in + * the vlan. + */ + lan966x_fdb_add_entry(lan966x, fdb_info); + if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x, + fdb_info->vid)) + break; + + lan966x_mac_cpu_learn(lan966x, fdb_info->addr, + fdb_info->vid); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + ret = lan966x_fdb_del_entry(lan966x, fdb_info); + if (!lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x, + fdb_info->vid)) + break; + + if (ret) + lan966x_mac_cpu_forget(lan966x, fdb_info->addr, + fdb_info->vid); + break; + } + } + +out: + kfree(fdb_work->fdb_info.addr); + kfree(fdb_work); + dev_put(dev); +} + +int lan966x_handle_fdb(struct net_device *dev, + struct net_device *orig_dev, + unsigned long event, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + struct lan966x_fdb_event_work *fdb_work; + + if (ctx && ctx != port) + return 0; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (lan966x_netdevice_check(orig_dev) && + !fdb_info->added_by_user) + break; + + fdb_work = kzalloc(sizeof(*fdb_work), GFP_ATOMIC); + if (!fdb_work) + return -ENOMEM; + + fdb_work->dev = orig_dev; + fdb_work->lan966x = lan966x; + fdb_work->event = event; + INIT_WORK(&fdb_work->work, lan966x_fdb_event_work); + memcpy(&fdb_work->fdb_info, fdb_info, sizeof(fdb_work->fdb_info)); + fdb_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!fdb_work->fdb_info.addr) + goto err_addr_alloc; + + ether_addr_copy((u8 *)fdb_work->fdb_info.addr, fdb_info->addr); + dev_hold(orig_dev); + + queue_work(lan966x->fdb_work, &fdb_work->work); + break; + } + + return 0; +err_addr_alloc: + kfree(fdb_work); + return -ENOMEM; +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index f6878b9f57ef..efadb8d326cc 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ +#include <net/switchdev.h> #include "lan966x_main.h" #define LAN966X_MAC_COLUMNS 4 @@ -13,6 +14,23 @@ #define MACACCESS_CMD_WRITE 7 #define MACACCESS_CMD_SYNC_GET_NEXT 8 +#define LAN966X_MAC_INVALID_ROW -1 + +struct lan966x_mac_entry { + struct list_head list; + unsigned char mac[ETH_ALEN] __aligned(2); + u16 vid; + u16 port_index; + int row; +}; + +struct lan966x_mac_raw_entry { + u32 mach; + u32 macl; + u32 maca; + bool processed; +}; + static int lan966x_mac_get_status(struct lan966x *lan966x) { return lan_rd(lan966x, ANA_MACACCESS); @@ -93,9 +111,333 @@ int lan966x_mac_cpu_forget(struct lan966x *lan966x, const char *addr, u16 vid) return lan966x_mac_forget(lan966x, addr, vid, ENTRYTYPE_LOCKED); } +void lan966x_mac_set_ageing(struct lan966x *lan966x, + u32 ageing) +{ + lan_rmw(ANA_AUTOAGE_AGE_PERIOD_SET(ageing / 2), + ANA_AUTOAGE_AGE_PERIOD, + lan966x, ANA_AUTOAGE); +} + void lan966x_mac_init(struct lan966x *lan966x) { /* Clear the MAC table */ lan_wr(MACACCESS_CMD_INIT, lan966x, ANA_MACACCESS); lan966x_mac_wait_for_completion(lan966x); + + spin_lock_init(&lan966x->mac_lock); + INIT_LIST_HEAD(&lan966x->mac_entries); +} + +static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *mac, + u16 vid, u16 port_index) +{ + struct lan966x_mac_entry *mac_entry; + + mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL); + if (!mac_entry) + return NULL; + + memcpy(mac_entry->mac, mac, ETH_ALEN); + mac_entry->vid = vid; + mac_entry->port_index = port_index; + mac_entry->row = LAN966X_MAC_INVALID_ROW; + return mac_entry; +} + +static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x, + const unsigned char *mac, + u16 vid, u16 port_index) +{ + struct lan966x_mac_entry *res = NULL; + struct lan966x_mac_entry *mac_entry; + + spin_lock(&lan966x->mac_lock); + list_for_each_entry(mac_entry, &lan966x->mac_entries, list) { + if (mac_entry->vid == vid && + ether_addr_equal(mac, mac_entry->mac) && + mac_entry->port_index == port_index) { + res = mac_entry; + break; + } + } + spin_unlock(&lan966x->mac_lock); + + return res; +} + +static int lan966x_mac_lookup(struct lan966x *lan966x, + const unsigned char mac[ETH_ALEN], + unsigned int vid, enum macaccess_entry_type type) +{ + int ret; + + lan966x_mac_select(lan966x, mac, vid); + + /* Issue a read command */ + lan_wr(ANA_MACACCESS_ENTRYTYPE_SET(type) | + ANA_MACACCESS_VALID_SET(1) | + ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_READ), + lan966x, ANA_MACACCESS); + + ret = lan966x_mac_wait_for_completion(lan966x); + if (ret) + return ret; + + return ANA_MACACCESS_VALID_GET(lan_rd(lan966x, ANA_MACACCESS)); +} + +static void lan966x_fdb_call_notifiers(enum switchdev_notifier_type type, + const char *mac, u16 vid, + struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info = { 0 }; + + info.addr = mac; + info.vid = vid; + info.offloaded = true; + call_switchdev_notifiers(type, dev, &info.info, NULL); +} + +int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port, + const unsigned char *addr, u16 vid) +{ + struct lan966x_mac_entry *mac_entry; + + if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) + return 0; + + /* In case the entry already exists, don't add it again to SW, + * just update HW, but we need to look in the actual HW because + * it is possible for an entry to be learn by HW and before we + * get the interrupt the frame will reach CPU and the CPU will + * add the entry but without the extern_learn flag. + */ + mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port); + if (mac_entry) + return lan966x_mac_learn(lan966x, port->chip_port, + addr, vid, ENTRYTYPE_LOCKED); + + mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port); + if (!mac_entry) + return -ENOMEM; + + spin_lock(&lan966x->mac_lock); + list_add_tail(&mac_entry->list, &lan966x->mac_entries); + spin_unlock(&lan966x->mac_lock); + + lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED); + lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev); + + return 0; +} + +int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr, + u16 vid) +{ + struct lan966x_mac_entry *mac_entry, *tmp; + + spin_lock(&lan966x->mac_lock); + list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, + list) { + if (mac_entry->vid == vid && + ether_addr_equal(addr, mac_entry->mac)) { + lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid, + ENTRYTYPE_LOCKED); + + list_del(&mac_entry->list); + kfree(mac_entry); + } + } + spin_unlock(&lan966x->mac_lock); + + return 0; +} + +void lan966x_mac_purge_entries(struct lan966x *lan966x) +{ + struct lan966x_mac_entry *mac_entry, *tmp; + + spin_lock(&lan966x->mac_lock); + list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, + list) { + lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid, + ENTRYTYPE_LOCKED); + + list_del(&mac_entry->list); + kfree(mac_entry); + } + spin_unlock(&lan966x->mac_lock); +} + +static void lan966x_mac_notifiers(enum switchdev_notifier_type type, + unsigned char *mac, u32 vid, + struct net_device *dev) +{ + rtnl_lock(); + lan966x_fdb_call_notifiers(type, mac, vid, dev); + rtnl_unlock(); +} + +static void lan966x_mac_process_raw_entry(struct lan966x_mac_raw_entry *raw_entry, + u8 *mac, u16 *vid, u32 *dest_idx) +{ + mac[0] = (raw_entry->mach >> 8) & 0xff; + mac[1] = (raw_entry->mach >> 0) & 0xff; + mac[2] = (raw_entry->macl >> 24) & 0xff; + mac[3] = (raw_entry->macl >> 16) & 0xff; + mac[4] = (raw_entry->macl >> 8) & 0xff; + mac[5] = (raw_entry->macl >> 0) & 0xff; + + *vid = (raw_entry->mach >> 16) & 0xfff; + *dest_idx = ANA_MACACCESS_DEST_IDX_GET(raw_entry->maca); +} + +static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, + struct lan966x_mac_raw_entry *raw_entries) +{ + struct lan966x_mac_entry *mac_entry, *tmp; + unsigned char mac[ETH_ALEN] __aligned(2); + u32 dest_idx; + u32 column; + u16 vid; + + spin_lock(&lan966x->mac_lock); + list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) { + bool found = false; + + if (mac_entry->row != row) + continue; + + for (column = 0; column < LAN966X_MAC_COLUMNS; ++column) { + /* All the valid entries are at the start of the row, + * so when get one invalid entry it can just skip the + * rest of the columns + */ + if (!ANA_MACACCESS_VALID_GET(raw_entries[column].maca)) + break; + + lan966x_mac_process_raw_entry(&raw_entries[column], + mac, &vid, &dest_idx); + WARN_ON(dest_idx > lan966x->num_phys_ports); + + /* If the entry in SW is found, then there is nothing + * to do + */ + if (mac_entry->vid == vid && + ether_addr_equal(mac_entry->mac, mac) && + mac_entry->port_index == dest_idx) { + raw_entries[column].processed = true; + found = true; + break; + } + } + + if (!found) { + /* Notify the bridge that the entry doesn't exist + * anymore in the HW and remove the entry from the SW + * list + */ + lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, + mac_entry->mac, mac_entry->vid, + lan966x->ports[mac_entry->port_index]->dev); + + list_del(&mac_entry->list); + kfree(mac_entry); + } + } + spin_unlock(&lan966x->mac_lock); + + /* Now go to the list of columns and see if any entry was not in the SW + * list, then that means that the entry is new so it needs to notify the + * bridge. + */ + for (column = 0; column < LAN966X_MAC_COLUMNS; ++column) { + /* All the valid entries are at the start of the row, so when + * get one invalid entry it can just skip the rest of the columns + */ + if (!ANA_MACACCESS_VALID_GET(raw_entries[column].maca)) + break; + + /* If the entry already exists then don't do anything */ + if (raw_entries[column].processed) + continue; + + lan966x_mac_process_raw_entry(&raw_entries[column], + mac, &vid, &dest_idx); + WARN_ON(dest_idx > lan966x->num_phys_ports); + + mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); + if (!mac_entry) + return; + + mac_entry->row = row; + + spin_lock(&lan966x->mac_lock); + list_add_tail(&mac_entry->list, &lan966x->mac_entries); + spin_unlock(&lan966x->mac_lock); + + lan966x_mac_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE, + mac, vid, lan966x->ports[dest_idx]->dev); + } +} + +irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x) +{ + struct lan966x_mac_raw_entry entry[LAN966X_MAC_COLUMNS] = { 0 }; + u32 index, column; + bool stop = true; + u32 val; + + /* Start the scan from 0, 0 */ + lan_wr(ANA_MACTINDX_M_INDEX_SET(0) | + ANA_MACTINDX_BUCKET_SET(0), + lan966x, ANA_MACTINDX); + + while (1) { + lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT), + ANA_MACACCESS_MAC_TABLE_CMD, + lan966x, ANA_MACACCESS); + lan966x_mac_wait_for_completion(lan966x); + + val = lan_rd(lan966x, ANA_MACTINDX); + index = ANA_MACTINDX_M_INDEX_GET(val); + column = ANA_MACTINDX_BUCKET_GET(val); + + /* The SYNC-GET-NEXT returns all the entries(4) in a row in + * which is suffered a change. By change it means that new entry + * was added or an entry was removed because of ageing. + * It would return all the columns for that row. And after that + * it would return the next row The stop conditions of the + * SYNC-GET-NEXT is when it reaches 'directly' to row 0 + * column 3. So if SYNC-GET-NEXT returns row 0 and column 0 + * then it is required to continue to read more even if it + * reaches row 0 and column 3. + */ + if (index == 0 && column == 0) + stop = false; + + if (column == LAN966X_MAC_COLUMNS - 1 && + index == 0 && stop) + break; + + entry[column].mach = lan_rd(lan966x, ANA_MACHDATA); + entry[column].macl = lan_rd(lan966x, ANA_MACLDATA); + entry[column].maca = lan_rd(lan966x, ANA_MACACCESS); + + /* Once all the columns are read process them */ + if (column == LAN966X_MAC_COLUMNS - 1) { + lan966x_mac_irq_process(lan966x, index, entry); + /* A row was processed so it is safe to assume that the + * next row/column can be the stop condition + */ + stop = true; + } + } + + lan_rmw(ANA_ANAINTR_INTR_SET(0), + ANA_ANAINTR_INTR, + lan966x, ANA_ANAINTR); + + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 101c1f005baf..16f4d8737d7b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -108,12 +108,12 @@ static int lan966x_port_set_mac_address(struct net_device *dev, void *p) int ret; /* Learn the new net device MAC address in the mac table. */ - ret = lan966x_mac_cpu_learn(lan966x, addr->sa_data, port->pvid); + ret = lan966x_mac_cpu_learn(lan966x, addr->sa_data, HOST_PVID); if (ret) return ret; /* Then forget the previous one. */ - ret = lan966x_mac_cpu_forget(lan966x, dev->dev_addr, port->pvid); + ret = lan966x_mac_cpu_forget(lan966x, dev->dev_addr, HOST_PVID); if (ret) return ret; @@ -283,6 +283,12 @@ static void lan966x_ifh_set_ipv(void *ifh, u64 bypass) IFH_POS_IPV, IFH_LEN * 4, PACK, 0); } +static void lan966x_ifh_set_vid(void *ifh, u64 vid) +{ + packing(ifh, &vid, IFH_POS_TCI + IFH_WID_TCI - 1, + IFH_POS_TCI, IFH_LEN * 4, PACK, 0); +} + static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev) { struct lan966x_port *port = netdev_priv(dev); @@ -294,32 +300,11 @@ static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev) lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port)); lan966x_ifh_set_qos_class(ifh, skb->priority >= 7 ? 0x7 : skb->priority); lan966x_ifh_set_ipv(ifh, skb->priority >= 7 ? 0x7 : skb->priority); + lan966x_ifh_set_vid(ifh, skb_vlan_tag_get(skb)); return lan966x_port_ifh_xmit(skb, ifh, dev); } -static void lan966x_set_promisc(struct lan966x_port *port, bool enable) -{ - struct lan966x *lan966x = port->lan966x; - - lan_rmw(ANA_CPU_FWD_CFG_SRC_COPY_ENA_SET(enable), - ANA_CPU_FWD_CFG_SRC_COPY_ENA, - lan966x, ANA_CPU_FWD_CFG(port->chip_port)); -} - -static void lan966x_port_change_rx_flags(struct net_device *dev, int flags) -{ - struct lan966x_port *port = netdev_priv(dev); - - if (!(flags & IFF_PROMISC)) - return; - - if (dev->flags & IFF_PROMISC) - lan966x_set_promisc(port, true); - else - lan966x_set_promisc(port, false); -} - static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) { struct lan966x_port *port = netdev_priv(dev); @@ -369,7 +354,6 @@ static const struct net_device_ops lan966x_port_netdev_ops = { .ndo_open = lan966x_port_open, .ndo_stop = lan966x_port_stop, .ndo_start_xmit = lan966x_port_xmit, - .ndo_change_rx_flags = lan966x_port_change_rx_flags, .ndo_change_mtu = lan966x_port_change_mtu, .ndo_set_rx_mode = lan966x_port_set_rx_mode, .ndo_get_phys_port_name = lan966x_port_get_phys_port_name, @@ -378,6 +362,11 @@ static const struct net_device_ops lan966x_port_netdev_ops = { .ndo_get_port_parent_id = lan966x_port_get_parent_id, }; +bool lan966x_netdevice_check(const struct net_device *dev) +{ + return dev->netdev_ops == &lan966x_port_netdev_ops; +} + static int lan966x_port_xtr_status(struct lan966x *lan966x, u8 grp) { return lan_rd(lan966x, QS_XTR_RD(grp)); @@ -514,6 +503,9 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) skb->protocol = eth_type_trans(skb, dev); + if (lan966x->bridge_mask & BIT(src_port)) + skb->offload_fwd_mark = 1; + netif_rx_ni(skb); dev->stats.rx_bytes += len; dev->stats.rx_packets++; @@ -527,6 +519,13 @@ recover: return IRQ_HANDLED; } +static irqreturn_t lan966x_ana_irq_handler(int irq, void *args) +{ + struct lan966x *lan966x = args; + + return lan966x_mac_irq_handler(lan966x); +} + static void lan966x_cleanup_ports(struct lan966x *lan966x) { struct lan966x_port *port; @@ -554,6 +553,11 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) disable_irq(lan966x->xtr_irq); lan966x->xtr_irq = -ENXIO; + + if (lan966x->ana_irq) { + disable_irq(lan966x->ana_irq); + lan966x->ana_irq = -ENXIO; + } } static int lan966x_probe_port(struct lan966x *lan966x, u32 p, @@ -578,13 +582,14 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, port->dev = dev; port->lan966x = lan966x; port->chip_port = p; - port->pvid = PORT_PVID; lan966x->ports[p] = port; dev->max_mtu = ETH_MAX_MTU; dev->netdev_ops = &lan966x_port_netdev_ops; dev->ethtool_ops = &lan966x_ethtool_ops; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; dev->needed_headroom = IFH_LEN * sizeof(u32); eth_hw_addr_gen(dev, lan966x->base_mac, p + 1); @@ -631,6 +636,10 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, return err; } + lan966x_vlan_port_set_vlan_aware(port, 0); + lan966x_vlan_port_set_vid(port, HOST_PVID, false, false); + lan966x_vlan_port_apply(port); + return 0; } @@ -641,6 +650,8 @@ static void lan966x_init(struct lan966x *lan966x) /* MAC table initialization */ lan966x_mac_init(lan966x); + lan966x_vlan_init(lan966x); + /* Flush queues */ lan_wr(lan_rd(lan966x, QS_XTR_FLUSH) | GENMASK(1, 0), @@ -704,8 +715,10 @@ static void lan966x_init(struct lan966x *lan966x) /* There are 8 priorities */ for (i = 0; i < 8; ++i) lan_rmw(ANA_FLOODING_FLD_MULTICAST_SET(PGID_MC) | + ANA_FLOODING_FLD_UNICAST_SET(PGID_UC) | ANA_FLOODING_FLD_BROADCAST_SET(PGID_BC), ANA_FLOODING_FLD_MULTICAST | + ANA_FLOODING_FLD_UNICAST | ANA_FLOODING_FLD_BROADCAST, lan966x, ANA_FLOODING(i)); @@ -757,6 +770,11 @@ static void lan966x_init(struct lan966x *lan966x) ANA_PGID_PGID, lan966x, ANA_PGID(PGID_MCIPV4)); + /* Unicast to all other ports */ + lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0), + ANA_PGID_PGID, + lan966x, ANA_PGID(PGID_UC)); + /* Broadcast to the CPU port and to other ports */ lan_rmw(ANA_PGID_PGID_SET(BIT(CPU_PORT) | GENMASK(lan966x->num_phys_ports - 1, 0)), ANA_PGID_PGID, @@ -870,6 +888,15 @@ static int lan966x_probe(struct platform_device *pdev) return -ENODEV; } + lan966x->ana_irq = platform_get_irq_byname(pdev, "ana"); + if (lan966x->ana_irq) { + err = devm_request_threaded_irq(&pdev->dev, lan966x->ana_irq, NULL, + lan966x_ana_irq_handler, IRQF_ONESHOT, + "ana irq", lan966x); + if (err) + return dev_err_probe(&pdev->dev, err, "Unable to use ana irq"); + } + /* init switch */ lan966x_init(lan966x); lan966x_stats_init(lan966x); @@ -899,6 +926,10 @@ static int lan966x_probe(struct platform_device *pdev) lan966x_port_init(lan966x->ports[p]); } + err = lan966x_fdb_init(lan966x); + if (err) + goto cleanup_ports; + return 0; cleanup_ports: @@ -923,6 +954,9 @@ static int lan966x_remove(struct platform_device *pdev) destroy_workqueue(lan966x->stats_queue); mutex_destroy(&lan966x->stats_lock); + lan966x_mac_purge_entries(lan966x); + lan966x_fdb_deinit(lan966x); + return 0; } @@ -934,7 +968,32 @@ static struct platform_driver lan966x_driver = { .of_match_table = lan966x_match, }, }; -module_platform_driver(lan966x_driver); + +static int __init lan966x_switch_driver_init(void) +{ + int ret; + + lan966x_register_notifier_blocks(); + + ret = platform_driver_register(&lan966x_driver); + if (ret) + goto err; + + return 0; + +err: + lan966x_unregister_notifier_blocks(); + return ret; +} + +static void __exit lan966x_switch_driver_exit(void) +{ + platform_driver_unregister(&lan966x_driver); + lan966x_unregister_notifier_blocks(); +} + +module_init(lan966x_switch_driver_init); +module_exit(lan966x_switch_driver_exit); MODULE_DESCRIPTION("Microchip LAN966X switch driver"); MODULE_AUTHOR("Horatiu Vultur <horatiu.vultur@microchip.com>"); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 7e5a3b6f168d..c399b1256edc 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -4,9 +4,11 @@ #define __LAN966X_MAIN_H__ #include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include <linux/jiffies.h> #include <linux/phy.h> #include <linux/phylink.h> +#include <net/switchdev.h> #include "lan966x_regs.h" #include "lan966x_ifh.h" @@ -22,7 +24,8 @@ #define PGID_SRC 80 #define PGID_ENTRIES 89 -#define PORT_PVID 0 +#define UNAWARE_PVID 0 +#define HOST_PVID 4095 /* Reserved amount for (SRC, PRIO) at index 8*SRC + PRIO */ #define QSYS_Q_RSRV 95 @@ -75,6 +78,16 @@ struct lan966x { u8 base_mac[ETH_ALEN]; + struct net_device *bridge; + u16 bridge_mask; + u16 bridge_fwd_mask; + + struct list_head mac_entries; + spinlock_t mac_lock; /* lock for mac_entries list */ + + u16 vlan_mask[VLAN_N_VID]; + DECLARE_BITMAP(cpu_vlan_mask, VLAN_N_VID); + /* stats */ const struct lan966x_stat_layout *stats_layout; u32 num_stats; @@ -87,6 +100,11 @@ struct lan966x { /* interrupts */ int xtr_irq; + int ana_irq; + + /* worqueue for fdb */ + struct workqueue_struct *fdb_work; + struct list_head fdb_entries; }; struct lan966x_port_config { @@ -105,6 +123,10 @@ struct lan966x_port { u8 chip_port; u16 pvid; + u16 vid; + bool vlan_aware; + + bool learn_ena; struct phylink_config phylink_config; struct phylink_pcs phylink_pcs; @@ -118,6 +140,11 @@ extern const struct phylink_mac_ops lan966x_phylink_mac_ops; extern const struct phylink_pcs_ops lan966x_phylink_pcs_ops; extern const struct ethtool_ops lan966x_ethtool_ops; +bool lan966x_netdevice_check(const struct net_device *dev); + +void lan966x_register_notifier_blocks(void); +void lan966x_unregister_notifier_blocks(void); + void lan966x_stats_get(struct net_device *dev, struct rtnl_link_stats64 *stats); int lan966x_stats_init(struct lan966x *lan966x); @@ -141,6 +168,43 @@ int lan966x_mac_forget(struct lan966x *lan966x, int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid); int lan966x_mac_cpu_forget(struct lan966x *lan966x, const char *addr, u16 vid); void lan966x_mac_init(struct lan966x *lan966x); +void lan966x_mac_set_ageing(struct lan966x *lan966x, + u32 ageing); +int lan966x_mac_del_entry(struct lan966x *lan966x, + const unsigned char *addr, + u16 vid); +int lan966x_mac_add_entry(struct lan966x *lan966x, + struct lan966x_port *port, + const unsigned char *addr, + u16 vid); +void lan966x_mac_purge_entries(struct lan966x *lan966x); +irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x); + +void lan966x_vlan_init(struct lan966x *lan966x); +void lan966x_vlan_port_apply(struct lan966x_port *port); +bool lan966x_vlan_cpu_member_cpu_vlan_mask(struct lan966x *lan966x, u16 vid); +void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port, + bool vlan_aware); +int lan966x_vlan_port_set_vid(struct lan966x_port *port, + u16 vid, + bool pvid, + bool untagged); +void lan966x_vlan_port_add_vlan(struct lan966x_port *port, + u16 vid, + bool pvid, + bool untagged); +void lan966x_vlan_port_del_vlan(struct lan966x_port *port, u16 vid); +void lan966x_vlan_cpu_add_vlan(struct lan966x *lan966x, u16 vid); +void lan966x_vlan_cpu_del_vlan(struct lan966x *lan966x, u16 vid); + +void lan966x_fdb_write_entries(struct lan966x *lan966x, u16 vid); +void lan966x_fdb_erase_entries(struct lan966x *lan966x, u16 vid); +int lan966x_fdb_init(struct lan966x *lan966x); +void lan966x_fdb_deinit(struct lan966x *lan966x); +int lan966x_handle_fdb(struct net_device *dev, + struct net_device *orig_dev, + unsigned long event, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info); static inline void __iomem *lan_addr(void __iomem *base[], int id, int tinst, int tcnt, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 879dcd807dec..a13c469e139a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -61,6 +61,9 @@ enum lan966x_target { #define ANA_ADVLEARN_VLAN_CHK_GET(x)\ FIELD_GET(ANA_ADVLEARN_VLAN_CHK, x) +/* ANA:ANA:VLANMASK */ +#define ANA_VLANMASK __REG(TARGET_ANA, 0, 1, 29824, 0, 1, 244, 8, 0, 1, 4) + /* ANA:ANA:ANAINTR */ #define ANA_ANAINTR __REG(TARGET_ANA, 0, 1, 29824, 0, 1, 244, 16, 0, 1, 4) @@ -88,6 +91,12 @@ enum lan966x_target { /* ANA:ANA:FLOODING */ #define ANA_FLOODING(r) __REG(TARGET_ANA, 0, 1, 29824, 0, 1, 244, 68, r, 8, 4) +#define ANA_FLOODING_FLD_UNICAST GENMASK(17, 12) +#define ANA_FLOODING_FLD_UNICAST_SET(x)\ + FIELD_PREP(ANA_FLOODING_FLD_UNICAST, x) +#define ANA_FLOODING_FLD_UNICAST_GET(x)\ + FIELD_GET(ANA_FLOODING_FLD_UNICAST, x) + #define ANA_FLOODING_FLD_BROADCAST GENMASK(11, 6) #define ANA_FLOODING_FLD_BROADCAST_SET(x)\ FIELD_PREP(ANA_FLOODING_FLD_BROADCAST, x) @@ -184,6 +193,102 @@ enum lan966x_target { #define ANA_MACACCESS_MAC_TABLE_CMD_GET(x)\ FIELD_GET(ANA_MACACCESS_MAC_TABLE_CMD, x) +/* ANA:ANA_TABLES:MACTINDX */ +#define ANA_MACTINDX __REG(TARGET_ANA, 0, 1, 27520, 0, 1, 128, 52, 0, 1, 4) + +#define ANA_MACTINDX_BUCKET GENMASK(12, 11) +#define ANA_MACTINDX_BUCKET_SET(x)\ + FIELD_PREP(ANA_MACTINDX_BUCKET, x) +#define ANA_MACTINDX_BUCKET_GET(x)\ + FIELD_GET(ANA_MACTINDX_BUCKET, x) + +#define ANA_MACTINDX_M_INDEX GENMASK(10, 0) +#define ANA_MACTINDX_M_INDEX_SET(x)\ + FIELD_PREP(ANA_MACTINDX_M_INDEX, x) +#define ANA_MACTINDX_M_INDEX_GET(x)\ + FIELD_GET(ANA_MACTINDX_M_INDEX, x) + +/* ANA:ANA_TABLES:VLAN_PORT_MASK */ +#define ANA_VLAN_PORT_MASK __REG(TARGET_ANA, 0, 1, 27520, 0, 1, 128, 56, 0, 1, 4) + +#define ANA_VLAN_PORT_MASK_VLAN_PORT_MASK GENMASK(8, 0) +#define ANA_VLAN_PORT_MASK_VLAN_PORT_MASK_SET(x)\ + FIELD_PREP(ANA_VLAN_PORT_MASK_VLAN_PORT_MASK, x) +#define ANA_VLAN_PORT_MASK_VLAN_PORT_MASK_GET(x)\ + FIELD_GET(ANA_VLAN_PORT_MASK_VLAN_PORT_MASK, x) + +/* ANA:ANA_TABLES:VLANACCESS */ +#define ANA_VLANACCESS __REG(TARGET_ANA, 0, 1, 27520, 0, 1, 128, 60, 0, 1, 4) + +#define ANA_VLANACCESS_VLAN_TBL_CMD GENMASK(1, 0) +#define ANA_VLANACCESS_VLAN_TBL_CMD_SET(x)\ + FIELD_PREP(ANA_VLANACCESS_VLAN_TBL_CMD, x) +#define ANA_VLANACCESS_VLAN_TBL_CMD_GET(x)\ + FIELD_GET(ANA_VLANACCESS_VLAN_TBL_CMD, x) + +/* ANA:ANA_TABLES:VLANTIDX */ +#define ANA_VLANTIDX __REG(TARGET_ANA, 0, 1, 27520, 0, 1, 128, 64, 0, 1, 4) + +#define ANA_VLANTIDX_VLAN_PGID_CPU_DIS BIT(18) +#define ANA_VLANTIDX_VLAN_PGID_CPU_DIS_SET(x)\ + FIELD_PREP(ANA_VLANTIDX_VLAN_PGID_CPU_DIS, x) +#define ANA_VLANTIDX_VLAN_PGID_CPU_DIS_GET(x)\ + FIELD_GET(ANA_VLANTIDX_VLAN_PGID_CPU_DIS, x) + +#define ANA_VLANTIDX_V_INDEX GENMASK(11, 0) +#define ANA_VLANTIDX_V_INDEX_SET(x)\ + FIELD_PREP(ANA_VLANTIDX_V_INDEX, x) +#define ANA_VLANTIDX_V_INDEX_GET(x)\ + FIELD_GET(ANA_VLANTIDX_V_INDEX, x) + +/* ANA:PORT:VLAN_CFG */ +#define ANA_VLAN_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 0, 0, 1, 4) + +#define ANA_VLAN_CFG_VLAN_AWARE_ENA BIT(20) +#define ANA_VLAN_CFG_VLAN_AWARE_ENA_SET(x)\ + FIELD_PREP(ANA_VLAN_CFG_VLAN_AWARE_ENA, x) +#define ANA_VLAN_CFG_VLAN_AWARE_ENA_GET(x)\ + FIELD_GET(ANA_VLAN_CFG_VLAN_AWARE_ENA, x) + +#define ANA_VLAN_CFG_VLAN_POP_CNT GENMASK(19, 18) +#define ANA_VLAN_CFG_VLAN_POP_CNT_SET(x)\ + FIELD_PREP(ANA_VLAN_CFG_VLAN_POP_CNT, x) +#define ANA_VLAN_CFG_VLAN_POP_CNT_GET(x)\ + FIELD_GET(ANA_VLAN_CFG_VLAN_POP_CNT, x) + +#define ANA_VLAN_CFG_VLAN_VID GENMASK(11, 0) +#define ANA_VLAN_CFG_VLAN_VID_SET(x)\ + FIELD_PREP(ANA_VLAN_CFG_VLAN_VID, x) +#define ANA_VLAN_CFG_VLAN_VID_GET(x)\ + FIELD_GET(ANA_VLAN_CFG_VLAN_VID, x) + +/* ANA:PORT:DROP_CFG */ +#define ANA_DROP_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 4, 0, 1, 4) + +#define ANA_DROP_CFG_DROP_UNTAGGED_ENA BIT(6) +#define ANA_DROP_CFG_DROP_UNTAGGED_ENA_SET(x)\ + FIELD_PREP(ANA_DROP_CFG_DROP_UNTAGGED_ENA, x) +#define ANA_DROP_CFG_DROP_UNTAGGED_ENA_GET(x)\ + FIELD_GET(ANA_DROP_CFG_DROP_UNTAGGED_ENA, x) + +#define ANA_DROP_CFG_DROP_PRIO_S_TAGGED_ENA BIT(3) +#define ANA_DROP_CFG_DROP_PRIO_S_TAGGED_ENA_SET(x)\ + FIELD_PREP(ANA_DROP_CFG_DROP_PRIO_S_TAGGED_ENA, x) +#define ANA_DROP_CFG_DROP_PRIO_S_TAGGED_ENA_GET(x)\ + FIELD_GET(ANA_DROP_CFG_DROP_PRIO_S_TAGGED_ENA, x) + +#define ANA_DROP_CFG_DROP_PRIO_C_TAGGED_ENA BIT(2) +#define ANA_DROP_CFG_DROP_PRIO_C_TAGGED_ENA_SET(x)\ + FIELD_PREP(ANA_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, x) +#define ANA_DROP_CFG_DROP_PRIO_C_TAGGED_ENA_GET(x)\ + FIELD_GET(ANA_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, x) + +#define ANA_DROP_CFG_DROP_MC_SMAC_ENA BIT(0) +#define ANA_DROP_CFG_DROP_MC_SMAC_ENA_SET(x)\ + FIELD_PREP(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x) +#define ANA_DROP_CFG_DROP_MC_SMAC_ENA_GET(x)\ + FIELD_GET(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x) + /* ANA:PORT:CPU_FWD_CFG */ #define ANA_CPU_FWD_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4) @@ -589,6 +694,36 @@ enum lan966x_target { /* QSYS:RES_CTRL:RES_CFG */ #define QSYS_RES_CFG(g) __REG(TARGET_QSYS, 0, 1, 32768, g, 1024, 8, 0, 0, 1, 4) +/* REW:PORT:PORT_VLAN_CFG */ +#define REW_PORT_VLAN_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 0, 0, 1, 4) + +#define REW_PORT_VLAN_CFG_PORT_TPID GENMASK(31, 16) +#define REW_PORT_VLAN_CFG_PORT_TPID_SET(x)\ + FIELD_PREP(REW_PORT_VLAN_CFG_PORT_TPID, x) +#define REW_PORT_VLAN_CFG_PORT_TPID_GET(x)\ + FIELD_GET(REW_PORT_VLAN_CFG_PORT_TPID, x) + +#define REW_PORT_VLAN_CFG_PORT_VID GENMASK(11, 0) +#define REW_PORT_VLAN_CFG_PORT_VID_SET(x)\ + FIELD_PREP(REW_PORT_VLAN_CFG_PORT_VID, x) +#define REW_PORT_VLAN_CFG_PORT_VID_GET(x)\ + FIELD_GET(REW_PORT_VLAN_CFG_PORT_VID, x) + +/* REW:PORT:TAG_CFG */ +#define REW_TAG_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 4, 0, 1, 4) + +#define REW_TAG_CFG_TAG_CFG GENMASK(8, 7) +#define REW_TAG_CFG_TAG_CFG_SET(x)\ + FIELD_PREP(REW_TAG_CFG_TAG_CFG, x) +#define REW_TAG_CFG_TAG_CFG_GET(x)\ + FIELD_GET(REW_TAG_CFG_TAG_CFG, x) + +#define REW_TAG_CFG_TAG_TPID_CFG GENMASK(6, 5) +#define REW_TAG_CFG_TAG_TPID_CFG_SET(x)\ + FIELD_PREP(REW_TAG_CFG_TAG_TPID_CFG, x) +#define REW_TAG_CFG_TAG_TPID_CFG_GET(x)\ + FIELD_GET(REW_TAG_CFG_TAG_TPID_CFG, x) + /* REW:PORT:PORT_CFG */ #define REW_PORT_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 8, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c new file mode 100644 index 000000000000..deb3dd5be67a --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/if_bridge.h> +#include <net/switchdev.h> + +#include "lan966x_main.h" + +static struct notifier_block lan966x_netdevice_nb __read_mostly; +static struct notifier_block lan966x_switchdev_nb __read_mostly; +static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly; + +static void lan966x_port_set_mcast_flood(struct lan966x_port *port, + bool enabled) +{ + u32 val = lan_rd(port->lan966x, ANA_PGID(PGID_MC)); + + val = ANA_PGID_PGID_GET(val); + if (enabled) + val |= BIT(port->chip_port); + else + val &= ~BIT(port->chip_port); + + lan_rmw(ANA_PGID_PGID_SET(val), + ANA_PGID_PGID, + port->lan966x, ANA_PGID(PGID_MC)); +} + +static void lan966x_port_set_ucast_flood(struct lan966x_port *port, + bool enabled) +{ + u32 val = lan_rd(port->lan966x, ANA_PGID(PGID_UC)); + + val = ANA_PGID_PGID_GET(val); + if (enabled) + val |= BIT(port->chip_port); + else + val &= ~BIT(port->chip_port); + + lan_rmw(ANA_PGID_PGID_SET(val), + ANA_PGID_PGID, + port->lan966x, ANA_PGID(PGID_UC)); +} + +static void lan966x_port_set_bcast_flood(struct lan966x_port *port, + bool enabled) +{ + u32 val = lan_rd(port->lan966x, ANA_PGID(PGID_BC)); + + val = ANA_PGID_PGID_GET(val); + if (enabled) + val |= BIT(port->chip_port); + else + val &= ~BIT(port->chip_port); + + lan_rmw(ANA_PGID_PGID_SET(val), + ANA_PGID_PGID, + port->lan966x, ANA_PGID(PGID_BC)); +} + +static void lan966x_port_set_learning(struct lan966x_port *port, bool enabled) +{ + lan_rmw(ANA_PORT_CFG_LEARN_ENA_SET(enabled), + ANA_PORT_CFG_LEARN_ENA, + port->lan966x, ANA_PORT_CFG(port->chip_port)); + + port->learn_ena = enabled; +} + +static void lan966x_port_bridge_flags(struct lan966x_port *port, + struct switchdev_brport_flags flags) +{ + if (flags.mask & BR_MCAST_FLOOD) + lan966x_port_set_mcast_flood(port, + !!(flags.val & BR_MCAST_FLOOD)); + + if (flags.mask & BR_FLOOD) + lan966x_port_set_ucast_flood(port, + !!(flags.val & BR_FLOOD)); + + if (flags.mask & BR_BCAST_FLOOD) + lan966x_port_set_bcast_flood(port, + !!(flags.val & BR_BCAST_FLOOD)); + + if (flags.mask & BR_LEARNING) + lan966x_port_set_learning(port, + !!(flags.val & BR_LEARNING)); +} + +static int lan966x_port_pre_bridge_flags(struct lan966x_port *port, + struct switchdev_brport_flags flags) +{ + if (flags.mask & ~(BR_MCAST_FLOOD | BR_FLOOD | BR_BCAST_FLOOD | + BR_LEARNING)) + return -EINVAL; + + return 0; +} + +static void lan966x_update_fwd_mask(struct lan966x *lan966x) +{ + int i; + + for (i = 0; i < lan966x->num_phys_ports; i++) { + struct lan966x_port *port = lan966x->ports[i]; + unsigned long mask = 0; + + if (port && lan966x->bridge_fwd_mask & BIT(i)) + mask = lan966x->bridge_fwd_mask & ~BIT(i); + + mask |= BIT(CPU_PORT); + + lan_wr(ANA_PGID_PGID_SET(mask), + lan966x, ANA_PGID(PGID_SRC + i)); + } +} + +static void lan966x_port_stp_state_set(struct lan966x_port *port, u8 state) +{ + struct lan966x *lan966x = port->lan966x; + bool learn_ena = false; + + if ((state == BR_STATE_FORWARDING || state == BR_STATE_LEARNING) && + port->learn_ena) + learn_ena = true; + + if (state == BR_STATE_FORWARDING) + lan966x->bridge_fwd_mask |= BIT(port->chip_port); + else + lan966x->bridge_fwd_mask &= ~BIT(port->chip_port); + + lan_rmw(ANA_PORT_CFG_LEARN_ENA_SET(learn_ena), + ANA_PORT_CFG_LEARN_ENA, + lan966x, ANA_PORT_CFG(port->chip_port)); + + lan966x_update_fwd_mask(lan966x); +} + +static void lan966x_port_ageing_set(struct lan966x_port *port, + unsigned long ageing_clock_t) +{ + unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); + u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000; + + lan966x_mac_set_ageing(port->lan966x, ageing_time); +} + +static int lan966x_port_attr_set(struct net_device *dev, const void *ctx, + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) +{ + struct lan966x_port *port = netdev_priv(dev); + int err = 0; + + if (ctx && ctx != port) + return 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + lan966x_port_bridge_flags(port, attr->u.brport_flags); + break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + err = lan966x_port_pre_bridge_flags(port, attr->u.brport_flags); + break; + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + lan966x_port_stp_state_set(port, attr->u.stp_state); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + lan966x_port_ageing_set(port, attr->u.ageing_time); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + lan966x_vlan_port_set_vlan_aware(port, attr->u.vlan_filtering); + lan966x_vlan_port_apply(port); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int lan966x_port_bridge_join(struct lan966x_port *port, + struct net_device *bridge, + struct netlink_ext_ack *extack) +{ + struct switchdev_brport_flags flags = {0}; + struct lan966x *lan966x = port->lan966x; + struct net_device *dev = port->dev; + int err; + + if (!lan966x->bridge_mask) { + lan966x->bridge = bridge; + } else { + if (lan966x->bridge != bridge) { + NL_SET_ERR_MSG_MOD(extack, "Not allow to add port to different bridge"); + return -ENODEV; + } + } + + err = switchdev_bridge_port_offload(dev, dev, port, + &lan966x_switchdev_nb, + &lan966x_switchdev_blocking_nb, + false, extack); + if (err) + return err; + + lan966x->bridge_mask |= BIT(port->chip_port); + + flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; + flags.val = flags.mask; + lan966x_port_bridge_flags(port, flags); + + return 0; +} + +static void lan966x_port_bridge_leave(struct lan966x_port *port, + struct net_device *bridge) +{ + struct switchdev_brport_flags flags = {0}; + struct lan966x *lan966x = port->lan966x; + + flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; + flags.val = flags.mask & ~BR_LEARNING; + lan966x_port_bridge_flags(port, flags); + + lan966x->bridge_mask &= ~BIT(port->chip_port); + + if (!lan966x->bridge_mask) + lan966x->bridge = NULL; + + /* Set the port back to host mode */ + lan966x_vlan_port_set_vlan_aware(port, false); + lan966x_vlan_port_set_vid(port, HOST_PVID, false, false); + lan966x_vlan_port_apply(port); +} + +static int lan966x_port_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct lan966x_port *port = netdev_priv(dev); + struct netlink_ext_ack *extack; + int err = 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (netif_is_bridge_master(info->upper_dev)) { + if (info->linking) + err = lan966x_port_bridge_join(port, info->upper_dev, + extack); + else + lan966x_port_bridge_leave(port, info->upper_dev); + } + + return err; +} + +static int lan966x_port_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct lan966x_port *port = netdev_priv(dev); + + if (netif_is_bridge_master(info->upper_dev) && !info->linking) + switchdev_bridge_port_unoffload(port->dev, port, + &lan966x_switchdev_nb, + &lan966x_switchdev_blocking_nb); + + return NOTIFY_DONE; +} + +static int lan966x_foreign_bridging_check(struct net_device *bridge, + struct netlink_ext_ack *extack) +{ + struct lan966x *lan966x = NULL; + bool has_foreign = false; + struct net_device *dev; + struct list_head *iter; + + if (!netif_is_bridge_master(bridge)) + return 0; + + netdev_for_each_lower_dev(bridge, dev, iter) { + if (lan966x_netdevice_check(dev)) { + struct lan966x_port *port = netdev_priv(dev); + + if (lan966x) { + /* Bridge already has at least one port of a + * lan966x switch inside it, check that it's + * the same instance of the driver. + */ + if (port->lan966x != lan966x) { + NL_SET_ERR_MSG_MOD(extack, + "Bridging between multiple lan966x switches disallowed"); + return -EINVAL; + } + } else { + /* This is the first lan966x port inside this + * bridge + */ + lan966x = port->lan966x; + } + } else { + has_foreign = true; + } + + if (lan966x && has_foreign) { + NL_SET_ERR_MSG_MOD(extack, + "Bridging lan966x ports with foreign interfaces disallowed"); + return -EINVAL; + } + } + + return 0; +} + +static int lan966x_bridge_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + return lan966x_foreign_bridging_check(info->upper_dev, + info->info.extack); +} + +static int lan966x_netdevice_port_event(struct net_device *dev, + struct notifier_block *nb, + unsigned long event, void *ptr) +{ + int err = 0; + + if (!lan966x_netdevice_check(dev)) { + if (event == NETDEV_CHANGEUPPER) + return lan966x_bridge_check(dev, ptr); + return 0; + } + + switch (event) { + case NETDEV_PRECHANGEUPPER: + err = lan966x_port_prechangeupper(dev, ptr); + break; + case NETDEV_CHANGEUPPER: + err = lan966x_bridge_check(dev, ptr); + if (err) + return err; + + err = lan966x_port_changeupper(dev, ptr); + break; + } + + return err; +} + +static int lan966x_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + int ret; + + ret = lan966x_netdevice_port_event(dev, nb, event, ptr); + + return notifier_from_errno(ret); +} + +static bool lan966x_foreign_dev_check(const struct net_device *dev, + const struct net_device *foreign_dev) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + + if (netif_is_bridge_master(foreign_dev)) + if (lan966x->bridge != foreign_dev) + return true; + + return false; +} + +static int lan966x_switchdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err; + + switch (event) { + case SWITCHDEV_PORT_ATTR_SET: + err = switchdev_handle_port_attr_set(dev, ptr, + lan966x_netdevice_check, + lan966x_port_attr_set); + return notifier_from_errno(err); + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = switchdev_handle_fdb_event_to_device(dev, event, ptr, + lan966x_netdevice_check, + lan966x_foreign_dev_check, + lan966x_handle_fdb, + NULL); + return notifier_from_errno(err); + } + + return NOTIFY_DONE; +} + +static int lan966x_handle_port_vlan_add(struct lan966x_port *port, + const struct switchdev_obj *obj) +{ + const struct switchdev_obj_port_vlan *v = SWITCHDEV_OBJ_PORT_VLAN(obj); + struct lan966x *lan966x = port->lan966x; + + /* When adding a port to a vlan, we get a callback for the port but + * also for the bridge. When get the callback for the bridge just bail + * out. Then when the bridge is added to the vlan, then we get a + * callback here but in this case the flags has set: + * BRIDGE_VLAN_INFO_BRENTRY. In this case it means that the CPU + * port is added to the vlan, so the broadcast frames and unicast frames + * with dmac of the bridge should be foward to CPU. + */ + if (netif_is_bridge_master(obj->orig_dev) && + !(v->flags & BRIDGE_VLAN_INFO_BRENTRY)) + return 0; + + if (!netif_is_bridge_master(obj->orig_dev)) + lan966x_vlan_port_add_vlan(port, v->vid, + v->flags & BRIDGE_VLAN_INFO_PVID, + v->flags & BRIDGE_VLAN_INFO_UNTAGGED); + else + lan966x_vlan_cpu_add_vlan(lan966x, v->vid); + + return 0; +} + +static int lan966x_handle_port_obj_add(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) +{ + struct lan966x_port *port = netdev_priv(dev); + int err; + + if (ctx && ctx != port) + return 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = lan966x_handle_port_vlan_add(port, obj); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int lan966x_handle_port_vlan_del(struct lan966x_port *port, + const struct switchdev_obj *obj) +{ + const struct switchdev_obj_port_vlan *v = SWITCHDEV_OBJ_PORT_VLAN(obj); + struct lan966x *lan966x = port->lan966x; + + if (!netif_is_bridge_master(obj->orig_dev)) + lan966x_vlan_port_del_vlan(port, v->vid); + else + lan966x_vlan_cpu_del_vlan(lan966x, v->vid); + + return 0; +} + +static int lan966x_handle_port_obj_del(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj) +{ + struct lan966x_port *port = netdev_priv(dev); + int err; + + if (ctx && ctx != port) + return 0; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = lan966x_handle_port_vlan_del(port, obj); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int lan966x_switchdev_blocking_event(struct notifier_block *nb, + unsigned long event, + void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + err = switchdev_handle_port_obj_add(dev, ptr, + lan966x_netdevice_check, + lan966x_handle_port_obj_add); + return notifier_from_errno(err); + case SWITCHDEV_PORT_OBJ_DEL: + err = switchdev_handle_port_obj_del(dev, ptr, + lan966x_netdevice_check, + lan966x_handle_port_obj_del); + return notifier_from_errno(err); + case SWITCHDEV_PORT_ATTR_SET: + err = switchdev_handle_port_attr_set(dev, ptr, + lan966x_netdevice_check, + lan966x_port_attr_set); + return notifier_from_errno(err); + } + + return NOTIFY_DONE; +} + +static struct notifier_block lan966x_netdevice_nb __read_mostly = { + .notifier_call = lan966x_netdevice_event, +}; + +static struct notifier_block lan966x_switchdev_nb __read_mostly = { + .notifier_call = lan966x_switchdev_event, +}; + +static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly = { + .notifier_call = lan966x_switchdev_blocking_event, +}; + +void lan966x_register_notifier_blocks(void) +{ + register_netdevice_notifier(&lan966x_netdevice_nb); + register_switchdev_notifier(&lan966x_switchdev_nb); + register_switchdev_blocking_notifier(&lan966x_switchdev_blocking_nb); +} + +void lan966x_unregister_notifier_blocks(void) +{ + unregister_switchdev_blocking_notifier(&lan966x_switchdev_blocking_nb); + unregister_switchdev_notifier(&lan966x_switchdev_nb); + unregister_netdevice_notifier(&lan966x_netdevice_nb); +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c new file mode 100644 index 000000000000..057f48ddf22c --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "lan966x_main.h" + +#define VLANACCESS_CMD_IDLE 0 +#define VLANACCESS_CMD_READ 1 +#define VLANACCESS_CMD_WRITE 2 +#define VLANACCESS_CMD_INIT 3 + +static int lan966x_vlan_get_status(struct lan966x *lan966x) +{ + return lan_rd(lan966x, ANA_VLANACCESS); +} + +static int lan966x_vlan_wait_for_completion(struct lan966x *lan966x) +{ + u32 val; + + return readx_poll_timeout(lan966x_vlan_get_status, + lan966x, val, + (val & ANA_VLANACCESS_VLAN_TBL_CMD) == + VLANACCESS_CMD_IDLE, + TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US); +} + +static void lan966x_vlan_set_mask(struct lan966x *lan966x, u16 vid) +{ + u16 mask = lan966x->vlan_mask[vid]; + bool cpu_dis; + + cpu_dis = !(mask & BIT(CPU_PORT)); + + /* Set flags and the VID to configure */ + lan_rmw(ANA_VLANTIDX_VLAN_PGID_CPU_DIS_SET(cpu_dis) | + ANA_VLANTIDX_V_INDEX_SET(vid), + ANA_VLANTIDX_VLAN_PGID_CPU_DIS | + ANA_VLANTIDX_V_INDEX, + lan966x, ANA_VLANTIDX); + + /* Set the vlan port members mask */ + lan_rmw(ANA_VLAN_PORT_MASK_VLAN_PORT_MASK_SET(mask), + ANA_VLAN_PORT_MASK_VLAN_PORT_MASK, + lan966x, ANA_VLAN_PORT_MASK); + + /* Issue a write command */ + lan_rmw(ANA_VLANACCESS_VLAN_TBL_CMD_SET(VLANACCESS_CMD_WRITE), + ANA_VLANACCESS_VLAN_TBL_CMD, + lan966x, ANA_VLANACCESS); + + if (lan966x_vlan_wait_for_completion(lan966x)) + dev_err(lan966x->dev, "Vlan set mask failed\n"); +} + +static void lan966x_vlan_port_add_vlan_mask(struct lan966x_port *port, u16 vid) +{ + struct lan966x *lan966x = port->lan966x; + u8 p = port->chip_port; + + lan966x->vlan_mask[vid] |= BIT(p); + lan966x_vlan_set_mask(lan966x, vid); +} + +static void lan966x_vlan_port_del_vlan_mask(struct lan966x_port *port, u16 vid) +{ + struct lan966x *lan966x = port->lan966x; + u8 p = port->chip_port; + + lan966x->vlan_mask[vid] &= ~BIT(p); + lan966x_vlan_set_mask(lan966x, vid); +} + +static bool lan966x_vlan_port_any_vlan_mask(struct lan966x *lan966x, u16 vid) +{ + return !!(lan966x->vlan_mask[vid] & ~BIT(CPU_PORT)); +} + +static void lan966x_vlan_cpu_add_vlan_mask(struct lan966x *lan966x, u16 vid) +{ + lan966x->vlan_mask[vid] |= BIT(CPU_PORT); + lan966x_vlan_set_mask(lan966x, vid); +} + +static void lan966x_vlan_cpu_del_vlan_mask(struct lan966x *lan966x, u16 vid) +{ + lan966x->vlan_mask[vid] &= ~BIT(CPU_PORT); + lan966x_vlan_set_mask(lan966x, vid); +} + +static void lan966x_vlan_cpu_add_cpu_vlan_mask(struct lan966x *lan966x, u16 vid) +{ + __set_bit(vid, lan966x->cpu_vlan_mask); +} + +static void lan966x_vlan_cpu_del_cpu_vlan_mask(struct lan966x *lan966x, u16 vid) +{ + __clear_bit(vid, lan966x->cpu_vlan_mask); +} + +bool lan966x_vlan_cpu_member_cpu_vlan_mask(struct lan966x *lan966x, u16 vid) +{ + return test_bit(vid, lan966x->cpu_vlan_mask); +} + +static u16 lan966x_vlan_port_get_pvid(struct lan966x_port *port) +{ + struct lan966x *lan966x = port->lan966x; + + if (!(lan966x->bridge_mask & BIT(port->chip_port))) + return HOST_PVID; + + return port->vlan_aware ? port->pvid : UNAWARE_PVID; +} + +int lan966x_vlan_port_set_vid(struct lan966x_port *port, u16 vid, + bool pvid, bool untagged) +{ + struct lan966x *lan966x = port->lan966x; + + /* Egress vlan classification */ + if (untagged && port->vid != vid) { + if (port->vid) { + dev_err(lan966x->dev, + "Port already has a native VLAN: %d\n", + port->vid); + return -EBUSY; + } + port->vid = vid; + } + + /* Default ingress vlan classification */ + if (pvid) + port->pvid = vid; + + return 0; +} + +static void lan966x_vlan_port_remove_vid(struct lan966x_port *port, u16 vid) +{ + if (port->pvid == vid) + port->pvid = 0; + + if (port->vid == vid) + port->vid = 0; +} + +void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port, + bool vlan_aware) +{ + port->vlan_aware = vlan_aware; +} + +void lan966x_vlan_port_apply(struct lan966x_port *port) +{ + struct lan966x *lan966x = port->lan966x; + u16 pvid; + u32 val; + + pvid = lan966x_vlan_port_get_pvid(port); + + /* Ingress clasification (ANA_PORT_VLAN_CFG) */ + /* Default vlan to classify for untagged frames (may be zero) */ + val = ANA_VLAN_CFG_VLAN_VID_SET(pvid); + if (port->vlan_aware) + val |= ANA_VLAN_CFG_VLAN_AWARE_ENA_SET(1) | + ANA_VLAN_CFG_VLAN_POP_CNT_SET(1); + + lan_rmw(val, + ANA_VLAN_CFG_VLAN_VID | ANA_VLAN_CFG_VLAN_AWARE_ENA | + ANA_VLAN_CFG_VLAN_POP_CNT, + lan966x, ANA_VLAN_CFG(port->chip_port)); + + /* Drop frames with multicast source address */ + val = ANA_DROP_CFG_DROP_MC_SMAC_ENA_SET(1); + if (port->vlan_aware && !pvid) + /* If port is vlan-aware and tagged, drop untagged and priority + * tagged frames. + */ + val |= ANA_DROP_CFG_DROP_UNTAGGED_ENA_SET(1) | + ANA_DROP_CFG_DROP_PRIO_S_TAGGED_ENA_SET(1) | + ANA_DROP_CFG_DROP_PRIO_C_TAGGED_ENA_SET(1); + + lan_wr(val, lan966x, ANA_DROP_CFG(port->chip_port)); + + /* Egress configuration (REW_TAG_CFG): VLAN tag type to 8021Q */ + val = REW_TAG_CFG_TAG_TPID_CFG_SET(0); + if (port->vlan_aware) { + if (port->vid) + /* Tag all frames except when VID == DEFAULT_VLAN */ + val |= REW_TAG_CFG_TAG_CFG_SET(1); + else + val |= REW_TAG_CFG_TAG_CFG_SET(3); + } + + /* Update only some bits in the register */ + lan_rmw(val, + REW_TAG_CFG_TAG_TPID_CFG | REW_TAG_CFG_TAG_CFG, + lan966x, REW_TAG_CFG(port->chip_port)); + + /* Set default VLAN and tag type to 8021Q */ + lan_rmw(REW_PORT_VLAN_CFG_PORT_TPID_SET(ETH_P_8021Q) | + REW_PORT_VLAN_CFG_PORT_VID_SET(port->vid), + REW_PORT_VLAN_CFG_PORT_TPID | + REW_PORT_VLAN_CFG_PORT_VID, + lan966x, REW_PORT_VLAN_CFG(port->chip_port)); +} + +void lan966x_vlan_port_add_vlan(struct lan966x_port *port, + u16 vid, + bool pvid, + bool untagged) +{ + struct lan966x *lan966x = port->lan966x; + + /* If the CPU(br) is already part of the vlan then add the fdb + * entries in MAC table to copy the frames to the CPU(br). + * If the CPU(br) is not part of the vlan then it would + * just drop the frames. + */ + if (lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x, vid)) { + lan966x_vlan_cpu_add_vlan_mask(lan966x, vid); + lan966x_fdb_write_entries(lan966x, vid); + } + + lan966x_vlan_port_set_vid(port, vid, pvid, untagged); + lan966x_vlan_port_add_vlan_mask(port, vid); + lan966x_vlan_port_apply(port); +} + +void lan966x_vlan_port_del_vlan(struct lan966x_port *port, u16 vid) +{ + struct lan966x *lan966x = port->lan966x; + + lan966x_vlan_port_remove_vid(port, vid); + lan966x_vlan_port_del_vlan_mask(port, vid); + lan966x_vlan_port_apply(port); + + /* In case there are no other ports in vlan then remove the CPU from + * that vlan but still keep it in the mask because it may be needed + * again then another port gets added in that vlan + */ + if (!lan966x_vlan_port_any_vlan_mask(lan966x, vid)) { + lan966x_vlan_cpu_del_vlan_mask(lan966x, vid); + lan966x_fdb_erase_entries(lan966x, vid); + } +} + +void lan966x_vlan_cpu_add_vlan(struct lan966x *lan966x, u16 vid) +{ + /* Add an entry in the MAC table for the CPU + * Add the CPU part of the vlan only if there is another port in that + * vlan otherwise all the broadcast frames in that vlan will go to CPU + * even if none of the ports are in the vlan and then the CPU will just + * need to discard these frames. It is required to store this + * information so when a front port is added then it would add also the + * CPU port. + */ + if (lan966x_vlan_port_any_vlan_mask(lan966x, vid)) + lan966x_vlan_cpu_add_vlan_mask(lan966x, vid); + + lan966x_vlan_cpu_add_cpu_vlan_mask(lan966x, vid); + lan966x_fdb_write_entries(lan966x, vid); +} + +void lan966x_vlan_cpu_del_vlan(struct lan966x *lan966x, u16 vid) +{ + /* Remove the CPU part of the vlan */ + lan966x_vlan_cpu_del_cpu_vlan_mask(lan966x, vid); + lan966x_vlan_cpu_del_vlan_mask(lan966x, vid); + lan966x_fdb_erase_entries(lan966x, vid); +} + +void lan966x_vlan_init(struct lan966x *lan966x) +{ + u16 port, vid; + + /* Clear VLAN table, by default all ports are members of all VLANS */ + lan_rmw(ANA_VLANACCESS_VLAN_TBL_CMD_SET(VLANACCESS_CMD_INIT), + ANA_VLANACCESS_VLAN_TBL_CMD, + lan966x, ANA_VLANACCESS); + lan966x_vlan_wait_for_completion(lan966x); + + for (vid = 1; vid < VLAN_N_VID; vid++) { + lan966x->vlan_mask[vid] = 0; + lan966x_vlan_set_mask(lan966x, vid); + } + + /* Set all the ports + cpu to be part of HOST_PVID and UNAWARE_PVID */ + lan966x->vlan_mask[HOST_PVID] = + GENMASK(lan966x->num_phys_ports - 1, 0) | BIT(CPU_PORT); + lan966x_vlan_set_mask(lan966x, HOST_PVID); + + lan966x->vlan_mask[UNAWARE_PVID] = + GENMASK(lan966x->num_phys_ports - 1, 0) | BIT(CPU_PORT); + lan966x_vlan_set_mask(lan966x, UNAWARE_PVID); + + lan966x_vlan_cpu_add_cpu_vlan_mask(lan966x, UNAWARE_PVID); + + /* Configure the CPU port to be vlan aware */ + lan_wr(ANA_VLAN_CFG_VLAN_VID_SET(0) | + ANA_VLAN_CFG_VLAN_AWARE_ENA_SET(1) | + ANA_VLAN_CFG_VLAN_POP_CNT_SET(1), + lan966x, ANA_VLAN_CFG(CPU_PORT)); + + /* Set vlan ingress filter mask to all ports */ + lan_wr(GENMASK(lan966x->num_phys_ports, 0), + lan966x, ANA_VLANMASK); + + for (port = 0; port < lan966x->num_phys_ports; port++) { + lan_wr(0, lan966x, REW_PORT_VLAN_CFG(port)); + lan_wr(0, lan966x, REW_TAG_CFG(port)); + } +} diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 58fce173f95b..beb9379424c0 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -303,7 +303,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, } filter->action.police_ena = true; - pol_ix = a->police.index + ocelot->vcap_pol.base; + pol_ix = a->hw_index + ocelot->vcap_pol.base; pol_max = ocelot->vcap_pol.max; if (ocelot->vcap_pol.max2 && pol_ix > pol_max) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 224089d04d98..f97eff5afd12 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1867,6 +1867,9 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void * void *data, void (*cleanup)(struct flow_block_cb *block_cb)) { + if (!netdev) + return -EOPNOTSUPP; + if (!nfp_fl_is_netdev_to_offload(netdev)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 18a262ef17f6..aca3c3f0f0ad 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -174,11 +174,14 @@ struct stmmac_flow_entry { /* Rx Frame Steering */ enum stmmac_rfs_type { STMMAC_RFS_T_VLAN, + STMMAC_RFS_T_LLDP, + STMMAC_RFS_T_1588, STMMAC_RFS_T_MAX, }; struct stmmac_rfs_entry { unsigned long cookie; + u16 etype; int in_use; int type; int tc; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index d0a2b289f460..d61766eeac6d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -237,6 +237,8 @@ static int tc_rfs_init(struct stmmac_priv *priv) int i; priv->rfs_entries_max[STMMAC_RFS_T_VLAN] = 8; + priv->rfs_entries_max[STMMAC_RFS_T_LLDP] = 1; + priv->rfs_entries_max[STMMAC_RFS_T_1588] = 1; for (i = 0; i < STMMAC_RFS_T_MAX; i++) priv->rfs_entries_total += priv->rfs_entries_max[i]; @@ -451,6 +453,8 @@ static int tc_parse_flow_actions(struct stmmac_priv *priv, return 0; } +#define ETHER_TYPE_FULL_MASK cpu_to_be16(~0) + static int tc_add_basic_flow(struct stmmac_priv *priv, struct flow_cls_offload *cls, struct stmmac_flow_entry *entry) @@ -464,6 +468,7 @@ static int tc_add_basic_flow(struct stmmac_priv *priv, return -EINVAL; flow_rule_match_basic(rule, &match); + entry->ip_proto = match.key->ip_proto; return 0; } @@ -724,6 +729,114 @@ static int tc_del_vlan_flow(struct stmmac_priv *priv, return 0; } +static int tc_add_ethtype_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + int tc = tc_classid_to_hwtc(priv->dev, cls->classid); + struct flow_match_basic match; + + if (!entry) { + entry = tc_find_rfs(priv, cls, true); + if (!entry) + return -ENOENT; + } + + /* Nothing to do here */ + if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC)) + return -EINVAL; + + if (tc < 0) { + netdev_err(priv->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + flow_rule_match_basic(rule, &match); + + if (match.mask->n_proto) { + u16 etype = ntohs(match.key->n_proto); + + if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) { + netdev_err(priv->dev, "Only full mask is supported for EthType filter"); + return -EINVAL; + } + switch (etype) { + case ETH_P_LLDP: + if (priv->rfs_entries_cnt[STMMAC_RFS_T_LLDP] >= + priv->rfs_entries_max[STMMAC_RFS_T_LLDP]) + return -ENOENT; + + entry->type = STMMAC_RFS_T_LLDP; + priv->rfs_entries_cnt[STMMAC_RFS_T_LLDP]++; + + stmmac_rx_queue_routing(priv, priv->hw, + PACKET_DCBCPQ, tc); + break; + case ETH_P_1588: + if (priv->rfs_entries_cnt[STMMAC_RFS_T_1588] >= + priv->rfs_entries_max[STMMAC_RFS_T_1588]) + return -ENOENT; + + entry->type = STMMAC_RFS_T_1588; + priv->rfs_entries_cnt[STMMAC_RFS_T_1588]++; + + stmmac_rx_queue_routing(priv, priv->hw, + PACKET_PTPQ, tc); + break; + default: + netdev_err(priv->dev, "EthType(0x%x) is not supported", etype); + return -EINVAL; + } + + entry->in_use = true; + entry->cookie = cls->cookie; + entry->tc = tc; + entry->etype = etype; + + return 0; + } + + return -EINVAL; +} + +static int tc_del_ethtype_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); + + if (!entry || !entry->in_use || + entry->type < STMMAC_RFS_T_LLDP || + entry->type > STMMAC_RFS_T_1588) + return -ENOENT; + + switch (entry->etype) { + case ETH_P_LLDP: + stmmac_rx_queue_routing(priv, priv->hw, + PACKET_DCBCPQ, 0); + priv->rfs_entries_cnt[STMMAC_RFS_T_LLDP]--; + break; + case ETH_P_1588: + stmmac_rx_queue_routing(priv, priv->hw, + PACKET_PTPQ, 0); + priv->rfs_entries_cnt[STMMAC_RFS_T_1588]--; + break; + default: + netdev_err(priv->dev, "EthType(0x%x) is not supported", + entry->etype); + return -EINVAL; + } + + entry->in_use = false; + entry->cookie = 0; + entry->tc = 0; + entry->etype = 0; + entry->type = 0; + + return 0; +} + static int tc_add_flow_cls(struct stmmac_priv *priv, struct flow_cls_offload *cls) { @@ -733,6 +846,10 @@ static int tc_add_flow_cls(struct stmmac_priv *priv, if (!ret) return ret; + ret = tc_add_ethtype_flow(priv, cls); + if (!ret) + return ret; + return tc_add_vlan_flow(priv, cls); } @@ -745,6 +862,10 @@ static int tc_del_flow_cls(struct stmmac_priv *priv, if (!ret) return ret; + ret = tc_del_ethtype_flow(priv, cls); + if (!ret) + return ret; + return tc_del_vlan_flow(priv, cls); } diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 44a24b99c894..c6a97fcca0e6 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -66,6 +66,19 @@ #define KSZ8081_LMD_SHORT_INDICATOR BIT(12) #define KSZ8081_LMD_DELTA_TIME_MASK GENMASK(8, 0) +/* Lan8814 general Interrupt control/status reg in GPHY specific block. */ +#define LAN8814_INTC 0x18 +#define LAN8814_INTS 0x1B + +#define LAN8814_INT_LINK_DOWN BIT(2) +#define LAN8814_INT_LINK_UP BIT(0) +#define LAN8814_INT_LINK (LAN8814_INT_LINK_UP |\ + LAN8814_INT_LINK_DOWN) + +#define LAN8814_INTR_CTRL_REG 0x34 +#define LAN8814_INTR_CTRL_REG_POLARITY BIT(1) +#define LAN8814_INTR_CTRL_REG_INTR_ENABLE BIT(0) + /* PHY Control 1 */ #define MII_KSZPHY_CTRL_1 0x1e #define KSZ8081_CTRL1_MDIX_STAT BIT(4) @@ -1620,6 +1633,58 @@ static int lan8804_config_init(struct phy_device *phydev) return 0; } +static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, LAN8814_INTS); + if (irq_status < 0) + return IRQ_NONE; + + if (!(irq_status & LAN8814_INT_LINK)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + +static int lan8814_ack_interrupt(struct phy_device *phydev) +{ + /* bit[12..0] int status, which is a read and clear register. */ + int rc; + + rc = phy_read(phydev, LAN8814_INTS); + + return (rc < 0) ? rc : 0; +} + +static int lan8814_config_intr(struct phy_device *phydev) +{ + int err; + + lanphy_write_page_reg(phydev, 4, LAN8814_INTR_CTRL_REG, + LAN8814_INTR_CTRL_REG_POLARITY | + LAN8814_INTR_CTRL_REG_INTR_ENABLE); + + /* enable / disable interrupts */ + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = lan8814_ack_interrupt(phydev); + if (err) + return err; + + err = phy_write(phydev, LAN8814_INTC, LAN8814_INT_LINK); + } else { + err = phy_write(phydev, LAN8814_INTC, 0); + if (err) + return err; + + err = lan8814_ack_interrupt(phydev); + } + + return err; +} + static struct phy_driver ksphy_driver[] = { { .phy_id = PHY_ID_KS8737, @@ -1802,6 +1867,8 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = kszphy_resume, + .config_intr = lan8814_config_intr, + .handle_interrupt = lan8814_handle_interrupt, }, { .phy_id = PHY_ID_LAN8804, .phy_id_mask = MICREL_PHY_ID_MASK, diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 4733fd7fb169..62c453a21e49 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2613,6 +2613,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) if (ieee80211_is_beacon(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control)) { struct ieee80211_mgmt *mgmt = (void *)skb->data; + enum cfg80211_bss_frame_type ftype; u8 *ies; int ies_ch; @@ -2623,9 +2624,14 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ies = mgmt->u.beacon.variable; + if (ieee80211_is_beacon(mgmt->frame_control)) + ftype = CFG80211_BSS_FTYPE_BEACON; + else + ftype = CFG80211_BSS_FTYPE_PRESP; + ies_ch = cfg80211_get_ies_channel_number(mgmt->u.beacon.variable, skb_tail_pointer(skb) - ies, - sband->band); + sband->band, ftype); if (ies_ch > 0 && ies_ch != channel) { ath10k_dbg(ar, ATH10K_DBG_MGMT, diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index ca68cf63dd27..198ade90b725 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -3,6 +3,8 @@ * Copyright (c) 2018-2020 The Linux Foundation. All rights reserved. */ +#include <linux/vmalloc.h> + #include "debugfs.h" #include "core.h" diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index 1f8a81987187..d6575feca5a2 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */ +#include <linux/rtnetlink.h> + #include "core.h" #include "debug.h" diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c index 88444fe6d1c6..1b76f4434c06 100644 --- a/drivers/net/wireless/ath/carl9170/tx.c +++ b/drivers/net/wireless/ath/carl9170/tx.c @@ -275,12 +275,12 @@ static void carl9170_tx_release(struct kref *ref) if (WARN_ON_ONCE(!ar)) return; - BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - - memset(&txinfo->status.ack_signal, 0, - sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ack_signal)); + /* + * This does not call ieee80211_tx_info_clear_status() because + * carl9170_tx_fill_rateinfo() has filled the rate information + * before we get to this point. + */ + memset_after(&txinfo->status, 0, rates); if (atomic_read(&ar->tx_total_queued)) ar->tx_schedule = true; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 9ef89ff09105..dd58c8f9aa11 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -608,7 +608,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = { .phy_cap_info[9] = IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB | IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB | - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED, + (IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED << + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS), .phy_cap_info[10] = IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF, }, @@ -665,7 +666,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = { IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI | IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242, .phy_cap_info[9] = - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED, + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED + << IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS, }, /* * Set default Tx/Rx HE MCS NSS Support field. diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index efda37645fd4..65f4fe3ef504 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2215,24 +2215,24 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, } flags |= STA_CTXT_HE_PACKET_EXT; - } else if ((sta->he_cap.he_cap_elem.phy_cap_info[9] & - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) != - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED) { + } else if (u8_get_bits(sta->he_cap.he_cap_elem.phy_cap_info[9], + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK) + != IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED) { int low_th = -1; int high_th = -1; /* Take the PPE thresholds from the nominal padding info */ - switch (sta->he_cap.he_cap_elem.phy_cap_info[9] & - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) { - case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US: + switch (u8_get_bits(sta->he_cap.he_cap_elem.phy_cap_info[9], + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK)) { + case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US: low_th = IWL_HE_PKT_EXT_NONE; high_th = IWL_HE_PKT_EXT_NONE; break; - case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US: + case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US: low_th = IWL_HE_PKT_EXT_BPSK; high_th = IWL_HE_PKT_EXT_NONE; break; - case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US: + case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US: low_th = IWL_HE_PKT_EXT_NONE; high_th = IWL_HE_PKT_EXT_BPSK; break; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 58d4d3bf6f86..87630d38dc52 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -5,6 +5,7 @@ * Copyright (C) 2016-2017 Intel Deutschland GmbH */ #include <linux/module.h> +#include <linux/rtnetlink.h> #include <linux/vmalloc.h> #include <net/mac80211.h> diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c index f702ad85e609..78450366312b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c @@ -4,6 +4,7 @@ */ #include "mvm.h" #include <linux/nl80211-vnd-intel.h> +#include <net/netlink.h> static const struct nla_policy iwl_mvm_vendor_attr_policy[NUM_IWL_MVM_VENDOR_ATTR] = { diff --git a/drivers/net/wireless/intersil/p54/txrx.c b/drivers/net/wireless/intersil/p54/txrx.c index 873fea59894f..8414aa208655 100644 --- a/drivers/net/wireless/intersil/p54/txrx.c +++ b/drivers/net/wireless/intersil/p54/txrx.c @@ -431,11 +431,7 @@ static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb) * Clear manually, ieee80211_tx_info_clear_status would * clear the counts too and we need them. */ - memset(&info->status.ack_signal, 0, - sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ack_signal)); - BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, - status.ack_signal) != 20); + memset_after(&info->status, 0, rates); if (entry_hdr->flags & cpu_to_le16(P54_HDR_FLAG_DATA_ALIGN)) pad = entry_data->align[0]; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 23219f3747f8..0307a6677907 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1276,7 +1276,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); /* If the queue contains MAX_QUEUE skb's drop some */ if (skb_queue_len(&data->pending) >= MAX_QUEUE) { - /* Droping until WARN_QUEUE level */ + /* Dropping until WARN_QUEUE level */ while (skb_queue_len(&data->pending) >= WARN_QUEUE) { ieee80211_free_txskb(hw, skb_dequeue(&data->pending)); data->tx_dropped++; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 4fa8e7ba93e6..d054cdecd5f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -853,7 +853,8 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band, mt7915_gen_ppe_thresh(he_cap->ppe_thres, nss); } else { he_cap_elem->phy_cap_info[9] |= - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US; + u8_encode_bits(IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US, + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK); } idx++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 3effe22f69c0..7a8d2596c226 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -152,7 +152,8 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band, mt7921_gen_ppe_thresh(he_cap->ppe_thres, nss); } else { he_cap_elem->phy_cap_info[9] |= - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US; + u8_encode_bits(IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US, + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK); } if (band == NL80211_BAND_6GHZ) { diff --git a/drivers/net/wireless/mediatek/mt76/testmode.h b/drivers/net/wireless/mediatek/mt76/testmode.h index 5e2792d81576..0590c35c7126 100644 --- a/drivers/net/wireless/mediatek/mt76/testmode.h +++ b/drivers/net/wireless/mediatek/mt76/testmode.h @@ -7,6 +7,8 @@ #define MT76_TM_TIMEOUT 10 +#include <net/netlink.h> + /** * enum mt76_testmode_attr - testmode attributes inside NL80211_ATTR_TESTDATA * diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 00ae86807dc2..a0737eea9f81 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Copyright(c) 2019-2020 Realtek Corporation */ +#include <linux/ip.h> +#include <linux/udp.h> #include "coex.h" #include "core.h" @@ -2119,7 +2121,8 @@ static void rtw89_init_he_cap(struct rtw89_dev *rtwdev, IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU | IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB | IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB | - IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US; + u8_encode_bits(IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US, + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK); if (i == NL80211_IFTYPE_STATION) phy_cap_info[9] |= IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU; he_cap->he_mcs_nss_supp.rx_mcs_80 = cpu_to_le16(mcs_map); diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 9756d75ef24e..22bd1d03e722 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -2,6 +2,8 @@ /* Copyright(c) 2019-2020 Realtek Corporation */ +#include <linux/vmalloc.h> + #include "coex.h" #include "debug.h" #include "fw.h" diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index d4b59fbe7365..8a57b75b07c0 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -781,7 +781,7 @@ static void __get_sta_he_pkt_padding(struct rtw89_dev *rtwdev, if (!ppe_th) { u8 pad; - pad = FIELD_GET(IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK, + pad = FIELD_GET(IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK, sta->he_cap.he_cap_elem.phy_cap_info[9]); for (i = 0; i < RTW89_PPE_BW_NUM; i++) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 11d7af260f20..559b6c644938 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2258,11 +2258,12 @@ enum ieee80211_client_reg_power { #define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU 0x08 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB 0x10 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB 0x20 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US 0x00 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US 0x40 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US 0x80 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED 0xc0 -#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK 0xc0 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US 0x0 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US 0x1 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US 0x2 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED 0x3 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS 6 +#define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK 0xc0 #define IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF 0x01 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a419718612c6..8b0bdeb4734e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -920,6 +920,7 @@ enum tc_setup_type { TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, TC_SETUP_QDISC_HTB, + TC_SETUP_ACT, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 231e06b74b50..c35f3962dc4f 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -230,6 +230,13 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw); bool rfkill_blocked(struct rfkill *rfkill); /** + * rfkill_soft_blocked - Query soft rfkill block state + * + * @rfkill: rfkill struct to query + */ +bool rfkill_soft_blocked(struct rfkill *rfkill); + +/** * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type * diff --git a/include/linux/wwan.h b/include/linux/wwan.h index e143c88bf4b0..afb3334ec8c5 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -4,12 +4,9 @@ #ifndef __WWAN_H #define __WWAN_H -#include <linux/device.h> -#include <linux/kernel.h> #include <linux/poll.h> -#include <linux/skbuff.h> -#include <linux/netlink.h> #include <linux/netdevice.h> +#include <linux/types.h> /** * enum wwan_port_type - WWAN port types @@ -37,6 +34,10 @@ enum wwan_port_type { WWAN_PORT_UNKNOWN, }; +struct device; +struct file; +struct netlink_ext_ack; +struct sk_buff; struct wwan_port; /** struct wwan_port_ops - The WWAN port operations diff --git a/include/net/act_api.h b/include/net/act_api.h index b5b624c7e488..3049cb69c025 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -7,6 +7,7 @@ */ #include <linux/refcount.h> +#include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/net_namespace.h> @@ -44,6 +45,7 @@ struct tc_action { u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; + u32 in_hw_count; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -88,6 +90,16 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) dtm->expires = jiffies_to_clock_t(stm->expires); } +static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -121,6 +133,8 @@ struct tc_action_ops { struct psample_group * (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); + int (*offload_act_setup)(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind); }; struct tc_action_net { @@ -189,7 +203,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - u32 flags, struct netlink_ext_ack *extack); + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); @@ -240,6 +254,9 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_update_hw_stats(struct tc_action *action); +int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **handle, struct netlink_ext_ack *newchain); @@ -251,6 +268,14 @@ DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); + +#else /* !CONFIG_NET_CLS_ACT */ + +static inline int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add) { + return 0; +} + #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 423f97b982ff..d19e48f9fdc6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1188,17 +1188,6 @@ struct cfg80211_unsol_bcast_probe_resp { }; /** - * enum cfg80211_ap_settings_flags - AP settings flags - * - * Used by cfg80211_ap_settings - * - * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication - */ -enum cfg80211_ap_settings_flags { - AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), -}; - -/** * struct cfg80211_ap_settings - AP configuration * * Used to configure an AP interface. @@ -4072,6 +4061,15 @@ struct mgmt_frame_regs { * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use * those to decrypt (Re)Association Request and encrypt (Re)Association * Response frame. + * + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4404,6 +4402,8 @@ struct cfg80211_ops { struct cfg80211_color_change_settings *params); int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_fils_aad *fils_aad); + int (*set_radar_background)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); }; /* @@ -6375,17 +6375,6 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, } /** - * cfg80211_get_ies_channel_number - returns the channel number from ies - * @ie: IEs - * @ielen: length of IEs - * @band: enum nl80211_band of the channel - * - * Returns the channel number, or -1 if none could be determined. - */ -int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band); - -/** * cfg80211_is_element_inherited - returns if element ID should be inherited * @element: element to check * @non_inherit_element: non inheritance element @@ -6421,6 +6410,19 @@ enum cfg80211_bss_frame_type { }; /** + * cfg80211_get_ies_channel_number - returns the channel number from ies + * @ie: IEs + * @ielen: length of IEs + * @band: enum nl80211_band of the channel + * @ftype: frame type + * + * Returns the channel number, or -1 if none could be determined. + */ +int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, + enum nl80211_band band, + enum cfg80211_bss_frame_type ftype); + +/** * cfg80211_inform_bss_data - inform cfg80211 of a new BSS * * @wiphy: the wiphy reporting the BSS @@ -7594,15 +7596,33 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp); /** - * cfg80211_radar_event - radar detection event + * __cfg80211_radar_event - radar detection event * @wiphy: the wiphy * @chandef: chandef for the current channel + * @offchan: the radar has been detected on the offchannel chain * @gfp: context flags * * This function is called when a radar is detected on the current chanenl. */ -void cfg80211_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, gfp_t gfp); +void __cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + bool offchan, gfp_t gfp); + +static inline void +cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, false, gfp); +} + +static inline void +cfg80211_background_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, true, gfp); +} /** * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event @@ -7633,6 +7653,14 @@ void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp); +/** + * cfg80211_background_cac_abort - Channel Availability Check offchan abort event + * @wiphy: the wiphy + * + * This function is called by the driver when a Channel Availability Check + * (CAC) is aborted by a offchannel dedicated chain. + */ +void cfg80211_background_cac_abort(struct wiphy *wiphy); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying @@ -8250,6 +8278,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, bool is_4addr, u8 check_swif); +/** + * cfg80211_assoc_comeback - notification of association that was + * temporarly rejected with a comeback + * @netdev: network device + * @bss: the bss entry with which association is in progress. + * @timeout: timeout interval value TUs. + * + * this function may sleep. the caller must hold the corresponding wdev's mutex. + */ +void cfg80211_assoc_comeback(struct net_device *netdev, + struct cfg80211_bss *bss, u32 timeout); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/net/codel.h b/include/net/codel.h index a6c9e34e62b8..5fed2f16cb8d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -44,8 +44,6 @@ #include <linux/types.h> #include <linux/ktime.h> #include <linux/skbuff.h> -#include <net/pkt_sched.h> -#include <net/inet_ecn.h> /* Controlling Queue Delay (CoDel) algorithm * ========================================= diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 137d40d8cbeb..78a27ac73070 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/inet_ecn.h> + static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 098630f83a55..58b6d0ebea10 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/pkt_sched.h> + /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ struct codel_skb_cb { codel_time_t enqueue_time; diff --git a/include/net/devlink.h b/include/net/devlink.h index 3276a29f2b81..8d5349d2fb68 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -459,6 +459,8 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -511,6 +513,12 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp" #define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size" +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size" +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3961461d9c8b..5b8c54eb7a6b 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -197,6 +197,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; + u32 hw_index; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -232,7 +233,6 @@ struct flow_action_entry { bool truncate; } sample; struct { /* FLOW_ACTION_POLICE */ - u32 index; u32 burst; u64 rate_bytes_ps; u64 burst_pkt; @@ -267,7 +267,6 @@ struct flow_action_entry { u8 ttl; } mpls_mangle; struct { - u32 index; s32 prio; u64 basetime; u64 cycletime; @@ -552,6 +551,23 @@ struct flow_cls_offload { u32 classid; }; +enum offload_act_command { + FLOW_ACT_REPLACE, + FLOW_ACT_DESTROY, + FLOW_ACT_STATS, +}; + +struct flow_offload_action { + struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/ + enum offload_act_command command; + enum flow_action_id id; + u32 index; + struct flow_stats stats; + struct flow_action action; +}; + +struct flow_offload_action *offload_action_alloc(unsigned int num_actions); + static inline struct flow_rule * flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd757f0987b0..8907338d52b5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1205,12 +1205,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) /* clear the rate counts */ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) info->status.rates[i].count = 0; - - BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - memset(&info->status.ampdu_ack_len, 0, - sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + memset_after(&info->status, 0, rates); } @@ -3944,6 +3939,16 @@ struct ieee80211_prep_tx_info { * twt structure. * @twt_teardown_request: Update the hw with TWT teardown request received * from the peer. + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. + * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to + * resolve a path for hardware flow offloading */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4272,6 +4277,13 @@ struct ieee80211_ops { struct ieee80211_twt_setup *twt); void (*twt_teardown_request)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u8 flowid); + int (*set_radar_background)(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chandef); + int (*net_fill_forward_path)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct net_device_path_ctx *ctx, + struct net_device_path *path); }; /** @@ -6064,6 +6076,18 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** + * ieee80211_channel_switch_disconnect - disconnect due to channel switch error + * @vif &struct ieee80211_vif pointer from the add_interface callback. + * @block_tx: if %true, do not send deauth frame. + * + * Instruct mac80211 to disconnect due to a channel switch error. The channel + * switch can request to block the tx and so, we need to make sure we do not send + * a deauth frame in this case. + */ +void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, + bool block_tx); + +/** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @smps_mode: new SM PS mode diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cebc1bd713b6..ebef45e821af 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -262,26 +262,31 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (; 0; (void)(i), (void)(a), (void)(exts)) #endif +#define tcf_act_for_each_action(i, a, actions) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++) + static inline void -tcf_exts_stats_update(const struct tcf_exts *exts, - u64 bytes, u64 packets, u64 drops, u64 lastuse, - u8 used_hw_stats, bool used_hw_stats_valid) +tcf_exts_hw_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 drops, u64 lastuse, + u8 used_hw_stats, bool used_hw_stats_valid) { #ifdef CONFIG_NET_CLS_ACT int i; - preempt_disable(); - for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, drops, - lastuse, true); - a->used_hw_stats = used_hw_stats; - a->used_hw_stats_valid = used_hw_stats_valid; - } + /* if stats from hw, just skip */ + if (tcf_action_update_hw_stats(a)) { + preempt_disable(); + tcf_action_stats_update(a, bytes, packets, drops, + lastuse, true); + preempt_enable(); - preempt_enable(); + a->used_hw_stats = used_hw_stats; + a->used_hw_stats_valid = used_hw_stats_valid; + } + } #endif } @@ -325,6 +330,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); +int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); @@ -536,9 +544,11 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) return ifindex == skb->skb_iif; } -int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); -void tc_cleanup_flow_action(struct flow_action *flow_action); +int tc_setup_offload_action(struct flow_action *flow_action, + const struct tcf_exts *exts); +void tc_cleanup_offload_action(struct flow_action *flow_action); +int tc_setup_action(struct flow_action *flow_action, + struct tc_action *actions[]); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..33cf4789009f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -510,8 +510,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } -#define sctp_for_each_hentry(epb, head) \ - hlist_for_each_entry(epb, head, node) +#define sctp_for_each_hentry(ep, head) \ + hlist_for_each_entry(ep, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e7d1ed7a3dfb..8826b47e4ff7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1243,10 +1243,6 @@ enum sctp_endpoint_type { */ struct sctp_ep_common { - /* Fields to help us manage our entries in the hash tables. */ - struct hlist_node node; - int hashent; - /* Runtime type information. What kind of endpoint is this? */ enum sctp_endpoint_type type; @@ -1298,6 +1294,10 @@ struct sctp_endpoint { /* Common substructure for endpoint and association. */ struct sctp_ep_common base; + /* Fields to help us manage our entries in the hash tables. */ + struct hlist_node node; + int hashent; + /* Associations: A list of current associations and mappings * to the data consumers for each association. This * may be in the form of a hash table or other diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index 8bc6be81a7ad..c8fa11ebb397 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -60,11 +60,6 @@ static inline bool is_tcf_gate(const struct tc_action *a) return false; } -static inline u32 tcf_gate_index(const struct tc_action *a) -{ - return a->tcfa_index; -} - static inline s32 tcf_gate_prio(const struct tc_action *a) { s32 tcfg_prio; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 61cab81e920d..f1a9d6594df2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1232,6 +1232,11 @@ * &NL80211_ATTR_FILS_NONCES - for FILS Nonces * (STA Nonce 16 bytes followed by AP Nonce 16 bytes) * + * @NL80211_CMD_ASSOC_COMEBACK: notification about an association + * temporal rejection with comeback. The event includes %NL80211_ATTR_MAC + * to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to + * specify the timeout value. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1474,6 +1479,8 @@ enum nl80211_commands { NL80211_CMD_SET_FILS_AAD, + NL80211_CMD_ASSOC_COMEBACK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2470,7 +2477,9 @@ enum nl80211_commands { * space supports external authentication. This attribute shall be used * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver * may offload authentication processing to user space if this capability - * is indicated in the respective requests from the user space. + * is indicated in the respective requests from the user space. (This flag + * attribute deprecated for %NL80211_CMD_START_AP, use + * %NL80211_ATTR_AP_SETTINGS_FLAGS) * * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED. @@ -2639,6 +2648,17 @@ enum nl80211_commands { * Mandatory parameter for the transmitting interface to enable MBSSID. * Optional for the non-transmitting interfaces. * + * @NL80211_ATTR_RADAR_BACKGROUND: Configure dedicated offchannel chain + * available for radar/CAC detection on some hw. This chain can't be used + * to transmit or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching on a different channel during CAC detection on the selected + * radar channel. + * + * @NL80211_ATTR_AP_SETTINGS_FLAGS: u32 attribute contains ap settings flags, + * enumerated in &enum nl80211_ap_settings_flags. This attribute shall be + * used with %NL80211_CMD_START_AP request. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3145,6 +3165,10 @@ enum nl80211_attrs { NL80211_ATTR_MBSSID_CONFIG, NL80211_ATTR_MBSSID_ELEMS, + NL80211_ATTR_RADAR_BACKGROUND, + + NL80211_ATTR_AP_SETTINGS_FLAGS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3738,13 +3762,12 @@ enum nl80211_mpath_info { * capabilities IE * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as * defined in HE capabilities IE - * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently - * defined * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16), * given for all 6 GHz band channels * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are * advertised on this band/for this iftype (binary) * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use + * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined */ enum nl80211_band_iftype_attr { __NL80211_BAND_IFTYPE_ATTR_INVALID, @@ -5555,7 +5578,7 @@ enum nl80211_iface_limit_attrs { * => allows 8 of AP/GO that can have BI gcd >= min gcd * * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 - * => allows two STAs on different channels + * => allows two STAs on the same or on different channels * * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 * => allows a STA plus three P2P interfaces @@ -5737,13 +5760,15 @@ enum nl80211_tdls_operation { NL80211_TDLS_DISABLE_LINK, }; -/* +/** * enum nl80211_ap_sme_features - device-integrated AP features - * Reserved for future use, no bits are defined in - * NL80211_ATTR_DEVICE_AP_SME yet. + * @NL80211_AP_SME_SA_QUERY_OFFLOAD: SA Query procedures offloaded to driver + * when user space indicates support for SA Query procedures offload during + * "start ap" with %NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT. + */ enum nl80211_ap_sme_features { + NL80211_AP_SME_SA_QUERY_OFFLOAD = 1 << 0, }; - */ /** * enum nl80211_feature_flags - device/driver features @@ -6051,6 +6076,9 @@ enum nl80211_feature_flags { * frames. Userspace has to share FILS AAD details to the driver by using * @NL80211_CMD_SET_FILS_AAD. * + * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC + * detection. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6117,6 +6145,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE, NL80211_EXT_FEATURE_BSS_COLOR, NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, + NL80211_EXT_FEATURE_RADAR_BACKGROUND, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -7462,4 +7491,20 @@ enum nl80211_mbssid_config_attributes { NL80211_MBSSID_CONFIG_ATTR_MAX = __NL80211_MBSSID_CONFIG_ATTR_LAST - 1, }; +/** + * enum nl80211_ap_settings_flags - AP settings flags + * + * @NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external + * authentication. + * @NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT: Userspace supports SA Query + * procedures offload to driver. If driver advertises + * %NL80211_AP_SME_SA_QUERY_OFFLOAD in AP SME features, userspace shall + * ignore SA Query procedures and validations when this flag is set by + * userspace. + */ +enum nl80211_ap_settings_flags { + NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = 1 << 0, + NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 6836ccb9c45d..ee38b35c3f57 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -19,13 +19,16 @@ enum { TCA_ACT_FLAGS, TCA_ACT_HW_STATS, TCA_ACT_USED_HW_STATS, + TCA_ACT_IN_HW_COUNT, __TCA_ACT_MAX }; /* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ -#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for - * actions stats. - */ +#define TCA_ACT_FLAGS_NO_PERCPU_STATS (1 << 0) /* Don't use percpu allocator for + * actions stats. + */ +#define TCA_ACT_FLAGS_SKIP_HW (1 << 1) /* don't offload action to HW */ +#define TCA_ACT_FLAGS_SKIP_SW (1 << 2) /* don't use action in SW */ /* tca HW stats type * When user does not pass the attribute, he does not care. diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5888492a5257..93d934cc4613 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -754,6 +754,8 @@ enum rtnetlink_groups { #define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP RTNLGRP_BRVLAN, #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/core/devlink.c b/net/core/devlink.c index 0a9349a02cad..6366ce324dce 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4466,6 +4466,16 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME, + .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME, + .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 6beaea13564a..73f68d4625f3 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/kernel.h> #include <linux/slab.h> +#include <net/act_api.h> #include <net/flow_offload.h> #include <linux/rtnetlink.h> #include <linux/mutex.h> @@ -27,6 +28,26 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) } EXPORT_SYMBOL(flow_rule_alloc); +struct flow_offload_action *offload_action_alloc(unsigned int num_actions) +{ + struct flow_offload_action *fl_action; + int i; + + fl_action = kzalloc(struct_size(fl_action, action.entries, num_actions), + GFP_KERNEL); + if (!fl_action) + return NULL; + + fl_action->action.num_entries = num_actions; + /* Pre-fill each action hw_stats with DONT_CARE. + * Caller can override this if it wants stats for a given action. + */ + for (i = 0; i < num_actions; i++) + fl_action->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE; + + return fl_action; +} + #define FLOW_DISSECTOR_MATCH(__rule, __type, __out) \ const struct flow_match *__m = &(__rule)->match; \ struct flow_dissector *__d = (__m)->dissector; \ @@ -397,6 +418,8 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) existing_qdiscs_register(cb, cb_priv); mutex_unlock(&flow_indr_block_lock); + tcf_action_reoffload_cb(cb, cb_priv, true); + return 0; } EXPORT_SYMBOL(flow_indr_dev_register); @@ -449,6 +472,7 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, __flow_block_indr_cleanup(release, cb_priv, &cleanup_list); mutex_unlock(&flow_indr_block_lock); + tcf_action_reoffload_cb(cb, cb_priv, false); flow_block_indr_notify(&cleanup_list); kfree(indr_dev); } @@ -549,19 +573,25 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_indr_dev *this; + u32 count = 0; + int err; mutex_lock(&flow_indr_block_lock); + if (bo) { + if (bo->command == FLOW_BLOCK_BIND) + indir_dev_add(data, dev, sch, type, cleanup, bo); + else if (bo->command == FLOW_BLOCK_UNBIND) + indir_dev_remove(data); + } - if (bo->command == FLOW_BLOCK_BIND) - indir_dev_add(data, dev, sch, type, cleanup, bo); - else if (bo->command == FLOW_BLOCK_UNBIND) - indir_dev_remove(data); - - list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); + list_for_each_entry(this, &flow_block_indr_dev_list, list) { + err = this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); + if (!err) + count++; + } mutex_unlock(&flow_indr_block_lock); - return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; + return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count; } EXPORT_SYMBOL(flow_indr_dev_setup_offload); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bd3d3195097f..dc3746d5cdc1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include <linux/ieee80211.h> @@ -3198,6 +3198,18 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) } EXPORT_SYMBOL(ieee80211_csa_finish); +void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_tx) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_local *local = sdata->local; + + sdata->csa_block_tx = block_tx; + sdata_info(sdata, "channel switch failed, disconnecting\n"); + ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); +} +EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); + static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, u32 *changed) { @@ -4268,6 +4280,21 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_HE_BSS_COLOR; ieee80211_bss_info_change_notify(sdata, changed); + + if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { + struct ieee80211_sub_if_data *child; + + mutex_lock(&sdata->local->iflist_mtx); + list_for_each_entry(child, &sdata->local->interfaces, list) { + if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { + child->vif.bss_conf.he_bss_color.color = color; + child->vif.bss_conf.he_bss_color.enabled = enable; + ieee80211_bss_info_change_notify(child, + BSS_CHANGED_HE_BSS_COLOR); + } + } + mutex_unlock(&sdata->local->iflist_mtx); + } } static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) @@ -4352,6 +4379,9 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, sdata_assert_lock(sdata); + if (sdata->vif.bss_conf.nontransmitted) + return -EINVAL; + mutex_lock(&local->mtx); /* don't allow another color change if one is already active or if csa @@ -4383,6 +4413,18 @@ out: return err; } +static int +ieee80211_set_radar_background(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (!local->ops->set_radar_background) + return -EOPNOTSUPP; + + return local->ops->set_radar_background(&local->hw, chandef); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4487,4 +4529,5 @@ const struct cfg80211_ops mac80211_config_ops = { .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, .color_change = ieee80211_color_change, + .set_radar_background = ieee80211_set_radar_background, }; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 481f01b0f65c..9479f2787ea7 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -936,14 +936,15 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf, PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB, "RX-FULL-BW-SU-USING-MU-WITH-NON-COMP-SIGB"); - switch (cap[9] & IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) { - case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US: + switch (u8_get_bits(cap[9], + IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK)) { + case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US: PRINT("NOMINAL-PACKET-PADDING-0US"); break; - case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US: + case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US: PRINT("NOMINAL-PACKET-PADDING-8US"); break; - case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US: + case IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US: PRINT("NOMINAL-PACKET-PADDING-16US"); break; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c336267f4599..4e2fc1a08681 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1486,4 +1486,26 @@ static inline void drv_twt_teardown_request(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int drv_net_fill_forward_path(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct net_device_path_ctx *ctx, + struct net_device_path *path) +{ + int ret = -EOPNOTSUPP; + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; + + trace_drv_net_fill_forward_path(local, sdata, sta); + if (local->ops->net_fill_forward_path) + ret = local->ops->net_fill_forward_path(&local->hw, + &sdata->vif, sta, + ctx, path); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5666bbb8860b..08c0542c93a3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1463,7 +1463,7 @@ struct ieee80211_local { }; static inline struct ieee80211_sub_if_data * -IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) +IEEE80211_DEV_TO_SUB_IF(const struct net_device *dev) { return netdev_priv(dev); } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 20aa5cc31f77..41531478437c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -789,6 +789,64 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_get_stats64 = ieee80211_get_stats64, }; +static int ieee80211_netdev_fill_forward_path(struct net_device_path_ctx *ctx, + struct net_device_path *path) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local; + struct sta_info *sta; + int ret = -ENOENT; + + sdata = IEEE80211_DEV_TO_SUB_IF(ctx->dev); + local = sdata->local; + + if (!local->ops->net_fill_forward_path) + return -EOPNOTSUPP; + + rcu_read_lock(); + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + sta = rcu_dereference(sdata->u.vlan.sta); + if (sta) + break; + if (sdata->wdev.use_4addr) + goto out; + if (is_multicast_ether_addr(ctx->daddr)) + goto out; + sta = sta_info_get_bss(sdata, ctx->daddr); + break; + case NL80211_IFTYPE_AP: + if (is_multicast_ether_addr(ctx->daddr)) + goto out; + sta = sta_info_get(sdata, ctx->daddr); + break; + case NL80211_IFTYPE_STATION: + if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { + sta = sta_info_get(sdata, ctx->daddr); + if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) + goto out; + + break; + } + } + + sta = sta_info_get(sdata, sdata->u.mgd.bssid); + break; + default: + goto out; + } + + if (!sta) + goto out; + + ret = drv_net_fill_forward_path(local, sdata, &sta->sta, ctx, path); +out: + rcu_read_unlock(); + + return ret; +} + static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, @@ -798,6 +856,7 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, + .ndo_fill_forward_path = ieee80211_netdev_fill_forward_path, }; static bool ieee80211_iftype_supports_hdr_offload(enum nl80211_iftype iftype) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 45fb517591ee..5311c3cd3050 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1131,17 +1131,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->scan_ies_len += 2 + sizeof(struct ieee80211_vht_cap); - /* HE cap element is variable in size - set len to allow max size */ /* - * TODO: 1 is added at the end of the calculation to accommodate for - * the temporary placing of the HE capabilities IE under EXT. - * Remove it once it is placed in the final place. - */ - if (supp_he) + * HE cap element is variable in size - set len to allow max size */ + if (supp_he) { local->scan_ies_len += - 2 + sizeof(struct ieee80211_he_cap_elem) + + 3 + sizeof(struct ieee80211_he_cap_elem) + sizeof(struct ieee80211_he_mcs_nss_supp) + - IEEE80211_HE_PPE_THRES_MAX_LEN + 1; + IEEE80211_HE_PPE_THRES_MAX_LEN; + } if (!local->ops->hw_scan) { /* For hw_scan, driver needs to set these up. */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 37f7d975f3da..51f55c4ee3c6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -164,12 +164,15 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, chandef->freq1_offset = channel->freq_offset; if (channel->band == NL80211_BAND_6GHZ) { - if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef)) + if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef)) { + mlme_dbg(sdata, + "bad 6 GHz operation, disabling HT/VHT/HE\n"); ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; - else + } else { ret = 0; + } vht_chandef = *chandef; goto out; } else if (sband->band == NL80211_BAND_S1GHZ) { @@ -190,6 +193,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); if (!ht_oper || !sta_ht_cap.ht_supported) { + mlme_dbg(sdata, "HT operation missing / HT not supported\n"); ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; @@ -223,6 +227,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { ieee80211_chandef_ht_oper(ht_oper, chandef); } else { + mlme_dbg(sdata, "40 MHz not supported\n"); /* 40 MHz (and 80 MHz) must be supported for VHT */ ret = IEEE80211_STA_DISABLE_VHT; /* also mark 40 MHz disabled */ @@ -231,6 +236,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!vht_oper || !sband->vht_cap.vht_supported) { + mlme_dbg(sdata, "VHT operation missing / VHT not supported\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -253,7 +259,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) sdata_info(sdata, - "HE AP VHT information is invalid, disable HE\n"); + "HE AP VHT information is invalid, disabling HE\n"); ret = IEEE80211_STA_DISABLE_HE; goto out; } @@ -263,7 +269,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, - "AP VHT information is invalid, disable VHT\n"); + "AP VHT information is invalid, disabling VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -271,7 +277,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (!cfg80211_chandef_valid(&vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, - "AP VHT information is invalid, disable VHT\n"); + "AP VHT information is invalid, disabling VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -284,7 +290,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, - "AP VHT information doesn't match HT, disable VHT\n"); + "AP VHT information doesn't match HT, disabling VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; goto out; } @@ -649,10 +655,6 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, if (!he_cap || !reg_cap) return; - /* - * TODO: the 1 added is because this temporarily is under the EXTENSION - * IE. Get rid of it when it moves. - */ he_cap_size = 2 + 1 + sizeof(he_cap->he_cap_elem) + ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + @@ -3741,6 +3743,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, elems->timeout_int && elems->timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) { u32 tu, ms; + + cfg80211_assoc_comeback(sdata->dev, assoc_data->bss, + le32_to_cpu(elems->timeout_int->value)); + tu = le32_to_cpu(elems->timeout_int->value); ms = tu * 1024 / 1000; sdata_info(sdata, @@ -5043,19 +5049,23 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* disable HT/VHT/HE if we don't support them */ if (!sband->ht_cap.ht_supported && !is_6ghz) { + mlme_dbg(sdata, "HT not supported, disabling HT/VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } if (!sband->vht_cap.vht_supported && is_5ghz) { + mlme_dbg(sdata, "VHT not supported, disabling VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } if (!ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) + ieee80211_vif_type_p2p(&sdata->vif))) { + mlme_dbg(sdata, "HE not supported, disabling it\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + } if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) { ht_oper = elems->ht_operation; @@ -5079,6 +5089,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } if (!elems->vht_cap_elem) { + sdata_info(sdata, + "bad VHT capabilities, disabling VHT\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; vht_oper = NULL; } @@ -5126,8 +5138,10 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, break; } - if (!have_80mhz) + if (!have_80mhz) { + sdata_info(sdata, "80 MHz not supported, disabling VHT\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } if (sband->band == NL80211_BAND_S1GHZ) { s1g_oper = elems->s1g_oper; @@ -5691,12 +5705,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, else if (!is_6ghz) ifmgd->flags |= IEEE80211_STA_DISABLE_HT; vht_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_VHT_CAPABILITY); - if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) + if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) { memcpy(&assoc_data->ap_vht_cap, vht_elem->data, sizeof(struct ieee80211_vht_cap)); - else if (is_5ghz) + } else if (is_5ghz) { + sdata_info(sdata, "VHT capa missing/short, disabling VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; + } rcu_read_unlock(); if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && @@ -5770,16 +5786,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } if (req->flags & ASSOC_REQ_DISABLE_HT) { + mlme_dbg(sdata, "HT disabled by flag, disabling HT/VHT/HE\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (req->flags & ASSOC_REQ_DISABLE_VHT) + if (req->flags & ASSOC_REQ_DISABLE_VHT) { + mlme_dbg(sdata, "VHT disabled by flag, disabling VHT\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } - if (req->flags & ASSOC_REQ_DISABLE_HE) + if (req->flags & ASSOC_REQ_DISABLE_HE) { + mlme_dbg(sdata, "HE disabled by flag, disabling VHT\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + } err = ieee80211_prep_connection(sdata, req->bss, true, override); if (err) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 72b44d4c42d0..9c3b7fc377c1 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -18,8 +18,6 @@ #define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 -#define SAMPLE_SWITCH_THR 100 - /* Number of bits for an average sized packet */ #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0544563ede52..fec82f7c2fa6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -465,7 +465,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, unsigned int stbc; rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS)); - *pos++ = local->hw.radiotap_mcs_details; + *pos = local->hw.radiotap_mcs_details; + if (status->enc_flags & RX_ENC_FLAG_HT_GF) + *pos |= IEEE80211_RADIOTAP_MCS_HAVE_FMT; + if (status->enc_flags & RX_ENC_FLAG_LDPC) + *pos |= IEEE80211_RADIOTAP_MCS_HAVE_FEC; + pos++; *pos = 0; if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) *pos |= IEEE80211_RADIOTAP_MCS_SGI; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 9e8381bef7ed..d91498f77796 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2892,6 +2892,13 @@ TRACE_EVENT(drv_twt_teardown_request, ) ); +DEFINE_EVENT(sta_event, drv_net_fill_forward_path, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + TP_ARGS(local, sdata, sta) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 86a54df3aabd..6d054fed062f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3821,7 +3821,7 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) { struct ieee80211_local *local = hw_to_local(hw); struct airtime_sched_info *air_sched; - u64 now = ktime_get_boottime_ns(); + u64 now = ktime_get_coarse_boottime_ns(); struct ieee80211_txq *ret = NULL; struct airtime_info *air_info; struct txq_info *txqi = NULL; @@ -3948,7 +3948,7 @@ void ieee80211_update_airtime_weight(struct ieee80211_local *local, u64 weight_sum = 0; if (unlikely(!now)) - now = ktime_get_boottime_ns(); + now = ktime_get_coarse_boottime_ns(); lockdep_assert_held(&air_sched->lock); @@ -3974,7 +3974,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); struct airtime_sched_info *air_sched; - u64 now = ktime_get_boottime_ns(); + u64 now = ktime_get_coarse_boottime_ns(); struct airtime_info *air_info; u8 ac = txq->ac; bool was_active; @@ -4032,7 +4032,7 @@ static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw, if (!purge) airtime_set_active(air_sched, air_info, - ktime_get_boottime_ns()); + ktime_get_coarse_boottime_ns()); rb_erase_cached(&txqi->schedule_order, &air_sched->active_txqs); @@ -4120,7 +4120,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, if (RB_EMPTY_NODE(&txqi->schedule_order)) goto out; - now = ktime_get_boottime_ns(); + now = ktime_get_coarse_boottime_ns(); /* Like in ieee80211_next_txq(), make sure the first station in the * scheduling order is eligible for transmission to avoid starvation. diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 4eed23e27610..7ed0d268aff2 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -449,7 +449,6 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return 0; - hdr = (struct ieee80211_hdr *) pos; pos += hdrlen; pn64 = atomic64_inc_return(&key->conf.tx_pn); @@ -686,7 +685,6 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return 0; - hdr = (struct ieee80211_hdr *)pos; pos += hdrlen; pn64 = atomic64_inc_return(&key->conf.tx_pn); @@ -881,8 +879,6 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx) if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; - hdr = (struct ieee80211_hdr *)rx->skb->data; - rx_pn = key->u.gen.rx_pn[qos_tid]; skb_pn = rx->skb->data + hdrlen + cs->pn_off; diff --git a/net/mctp/device.c b/net/mctp/device.c index 8799ee77e7b7..ef2755f82f87 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -35,14 +35,24 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->mctp_ptr); } -static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb, - struct mctp_dev *mdev, mctp_eid_t eid) +static int mctp_addrinfo_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1) // IFA_LOCAL + + nla_total_size(1) // IFA_ADDRESS + ; +} + +/* flag should be NLM_F_MULTI for dump calls */ +static int mctp_fill_addrinfo(struct sk_buff *skb, + struct mctp_dev *mdev, mctp_eid_t eid, + int msg_type, u32 portid, u32 seq, int flag) { struct ifaddrmsg *hdr; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, + msg_type, sizeof(*hdr), flag); if (!nlh) return -EMSGSIZE; @@ -72,10 +82,14 @@ static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb, struct netlink_callback *cb) { struct mctp_dump_cb *mcb = (void *)cb->ctx; + u32 portid, seq; int rc = 0; + portid = NETLINK_CB(cb->skb).portid; + seq = cb->nlh->nlmsg_seq; for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) { - rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]); + rc = mctp_fill_addrinfo(skb, mdev, mdev->addrs[mcb->a_idx], + RTM_NEWADDR, portid, seq, NLM_F_MULTI); if (rc < 0) break; } @@ -127,6 +141,32 @@ out: return skb->len; } +static void mctp_addr_notify(struct mctp_dev *mdev, mctp_eid_t eid, int msg_type, + struct sk_buff *req_skb, struct nlmsghdr *req_nlh) +{ + u32 portid = NETLINK_CB(req_skb).portid; + struct net *net = dev_net(mdev->dev); + struct sk_buff *skb; + int rc = -ENOBUFS; + + skb = nlmsg_new(mctp_addrinfo_size(), GFP_KERNEL); + if (!skb) + goto out; + + rc = mctp_fill_addrinfo(skb, mdev, eid, msg_type, + portid, req_nlh->nlmsg_seq, 0); + if (rc < 0) { + WARN_ON_ONCE(rc == -EMSGSIZE); + goto out; + } + + rtnl_notify(skb, net, portid, RTNLGRP_MCTP_IFADDR, req_nlh, GFP_KERNEL); + return; +out: + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_MCTP_IFADDR, rc); +} + static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = { [IFA_ADDRESS] = { .type = NLA_U8 }, [IFA_LOCAL] = { .type = NLA_U8 }, @@ -189,6 +229,7 @@ static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, kfree(tmp_addrs); + mctp_addr_notify(mdev, addr->s_addr, RTM_NEWADDR, skb, nlh); mctp_route_add_local(mdev, addr->s_addr); return 0; @@ -244,6 +285,8 @@ static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, mdev->num_addrs--; spin_unlock_irqrestore(&mdev->addrs_lock, flags); + mctp_addr_notify(mdev, addr->s_addr, RTM_DELADDR, skb, nlh); + return 0; } diff --git a/net/rfkill/core.c b/net/rfkill/core.c index ac15a944573f..5b1927d66f0d 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -946,6 +946,18 @@ bool rfkill_blocked(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_blocked); +bool rfkill_soft_blocked(struct rfkill *rfkill) +{ + unsigned long flags; + u32 state; + + spin_lock_irqsave(&rfkill->lock, flags); + state = rfkill->state; + spin_unlock_irqrestore(&rfkill->lock, flags); + + return !!(state & RFKILL_BLOCK_SW); +} +EXPORT_SYMBOL(rfkill_soft_blocked); struct rfkill * __must_check rfkill_alloc(const char *name, struct device *parent, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3258da3d5bed..32563cef85bf 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -19,8 +19,10 @@ #include <net/sock.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> +#include <net/tc_act/tc_pedit.h> #include <net/act_api.h> #include <net/netlink.h> +#include <net/flow_offload.h> #ifdef CONFIG_INET DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); @@ -129,8 +131,244 @@ static void free_tcf(struct tc_action *p) kfree(p); } +static void offload_action_hw_count_set(struct tc_action *act, + u32 hw_count) +{ + act->in_hw_count = hw_count; +} + +static void offload_action_hw_count_inc(struct tc_action *act, + u32 hw_count) +{ + act->in_hw_count += hw_count; +} + +static void offload_action_hw_count_dec(struct tc_action *act, + u32 hw_count) +{ + act->in_hw_count = act->in_hw_count > hw_count ? + act->in_hw_count - hw_count : 0; +} + +static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act) +{ + if (is_tcf_pedit(act)) + return tcf_pedit_nkeys(act); + else + return 1; +} + +static bool tc_act_skip_hw(u32 flags) +{ + return (flags & TCA_ACT_FLAGS_SKIP_HW) ? true : false; +} + +static bool tc_act_skip_sw(u32 flags) +{ + return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false; +} + +static bool tc_act_in_hw(struct tc_action *act) +{ + return !!act->in_hw_count; +} + +/* SKIP_HW and SKIP_SW are mutually exclusive flags. */ +static bool tc_act_flags_valid(u32 flags) +{ + flags &= TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW; + + return flags ^ (TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW); +} + +static int offload_action_init(struct flow_offload_action *fl_action, + struct tc_action *act, + enum offload_act_command cmd, + struct netlink_ext_ack *extack) +{ + int err; + + fl_action->extack = extack; + fl_action->command = cmd; + fl_action->index = act->tcfa_index; + + if (act->ops->offload_act_setup) { + spin_lock_bh(&act->tcfa_lock); + err = act->ops->offload_act_setup(act, fl_action, NULL, + false); + spin_unlock_bh(&act->tcfa_lock); + return err; + } + + return -EOPNOTSUPP; +} + +static int tcf_action_offload_cmd_ex(struct flow_offload_action *fl_act, + u32 *hw_count) +{ + int err; + + err = flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT, + fl_act, NULL, NULL); + if (err < 0) + return err; + + if (hw_count) + *hw_count = err; + + return 0; +} + +static int tcf_action_offload_cmd_cb_ex(struct flow_offload_action *fl_act, + u32 *hw_count, + flow_indr_block_bind_cb_t *cb, + void *cb_priv) +{ + int err; + + err = cb(NULL, NULL, cb_priv, TC_SETUP_ACT, NULL, fl_act, NULL); + if (err < 0) + return err; + + if (hw_count) + *hw_count = 1; + + return 0; +} + +static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, + u32 *hw_count, + flow_indr_block_bind_cb_t *cb, + void *cb_priv) +{ + return cb ? tcf_action_offload_cmd_cb_ex(fl_act, hw_count, + cb, cb_priv) : + tcf_action_offload_cmd_ex(fl_act, hw_count); +} + +static int tcf_action_offload_add_ex(struct tc_action *action, + struct netlink_ext_ack *extack, + flow_indr_block_bind_cb_t *cb, + void *cb_priv) +{ + bool skip_sw = tc_act_skip_sw(action->tcfa_flags); + struct tc_action *actions[TCA_ACT_MAX_PRIO] = { + [0] = action, + }; + struct flow_offload_action *fl_action; + u32 in_hw_count = 0; + int num, err = 0; + + if (tc_act_skip_hw(action->tcfa_flags)) + return 0; + + num = tcf_offload_act_num_actions_single(action); + fl_action = offload_action_alloc(num); + if (!fl_action) + return -ENOMEM; + + err = offload_action_init(fl_action, action, FLOW_ACT_REPLACE, extack); + if (err) + goto fl_err; + + err = tc_setup_action(&fl_action->action, actions); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to setup tc actions for offload\n"); + goto fl_err; + } + + err = tcf_action_offload_cmd(fl_action, &in_hw_count, cb, cb_priv); + if (!err) + cb ? offload_action_hw_count_inc(action, in_hw_count) : + offload_action_hw_count_set(action, in_hw_count); + + if (skip_sw && !tc_act_in_hw(action)) + err = -EINVAL; + + tc_cleanup_offload_action(&fl_action->action); + +fl_err: + kfree(fl_action); + + return err; +} + +/* offload the tc action after it is inserted */ +static int tcf_action_offload_add(struct tc_action *action, + struct netlink_ext_ack *extack) +{ + return tcf_action_offload_add_ex(action, extack, NULL, NULL); +} + +int tcf_action_update_hw_stats(struct tc_action *action) +{ + struct flow_offload_action fl_act = {}; + int err; + + if (!tc_act_in_hw(action)) + return -EOPNOTSUPP; + + err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL); + if (err) + return err; + + err = tcf_action_offload_cmd(&fl_act, NULL, NULL, NULL); + if (!err) { + preempt_disable(); + tcf_action_stats_update(action, fl_act.stats.bytes, + fl_act.stats.pkts, + fl_act.stats.drops, + fl_act.stats.lastused, + true); + preempt_enable(); + action->used_hw_stats = fl_act.stats.used_hw_stats; + action->used_hw_stats_valid = true; + } else { + return -EOPNOTSUPP; + } + + return 0; +} +EXPORT_SYMBOL(tcf_action_update_hw_stats); + +static int tcf_action_offload_del_ex(struct tc_action *action, + flow_indr_block_bind_cb_t *cb, + void *cb_priv) +{ + struct flow_offload_action fl_act = {}; + u32 in_hw_count = 0; + int err = 0; + + if (!tc_act_in_hw(action)) + return 0; + + err = offload_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL); + if (err) + return err; + + err = tcf_action_offload_cmd(&fl_act, &in_hw_count, cb, cb_priv); + if (err < 0) + return err; + + if (!cb && action->in_hw_count != in_hw_count) + return -EINVAL; + + /* do not need to update hw state when deleting action */ + if (cb && in_hw_count) + offload_action_hw_count_dec(action, in_hw_count); + + return 0; +} + +static int tcf_action_offload_del(struct tc_action *action) +{ + return tcf_action_offload_del_ex(action, NULL, NULL); +} + static void tcf_action_cleanup(struct tc_action *p) { + tcf_action_offload_del(p); if (p->ops->cleanup) p->ops->cleanup(p); @@ -497,7 +735,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; - p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK; + p->tcfa_flags = flags; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, @@ -622,6 +860,59 @@ EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); +/* since act ops id is stored in pernet subsystem list, + * then there is no way to walk through only all the action + * subsystem, so we keep tc action pernet ops id for + * reoffload to walk through. + */ +static LIST_HEAD(act_pernet_id_list); +static DEFINE_MUTEX(act_id_mutex); +struct tc_act_pernet_id { + struct list_head list; + unsigned int id; +}; + +static int tcf_pernet_add_id_list(unsigned int id) +{ + struct tc_act_pernet_id *id_ptr; + int ret = 0; + + mutex_lock(&act_id_mutex); + list_for_each_entry(id_ptr, &act_pernet_id_list, list) { + if (id_ptr->id == id) { + ret = -EEXIST; + goto err_out; + } + } + + id_ptr = kzalloc(sizeof(*id_ptr), GFP_KERNEL); + if (!id_ptr) { + ret = -ENOMEM; + goto err_out; + } + id_ptr->id = id; + + list_add_tail(&id_ptr->list, &act_pernet_id_list); + +err_out: + mutex_unlock(&act_id_mutex); + return ret; +} + +static void tcf_pernet_del_id_list(unsigned int id) +{ + struct tc_act_pernet_id *id_ptr; + + mutex_lock(&act_id_mutex); + list_for_each_entry(id_ptr, &act_pernet_id_list, list) { + if (id_ptr->id == id) { + list_del(&id_ptr->list); + kfree(id_ptr); + break; + } + } + mutex_unlock(&act_id_mutex); +} int tcf_register_action(struct tc_action_ops *act, struct pernet_operations *ops) @@ -640,18 +931,31 @@ int tcf_register_action(struct tc_action_ops *act, if (ret) return ret; + if (ops->id) { + ret = tcf_pernet_add_id_list(*ops->id); + if (ret) + goto err_id; + } + write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { - write_unlock(&act_mod_lock); - unregister_pernet_subsys(ops); - return -EEXIST; + ret = -EEXIST; + goto err_out; } } list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); return 0; + +err_out: + write_unlock(&act_mod_lock); + if (ops->id) + tcf_pernet_del_id_list(*ops->id); +err_id: + unregister_pernet_subsys(ops); + return ret; } EXPORT_SYMBOL(tcf_register_action); @@ -670,8 +974,11 @@ int tcf_unregister_action(struct tc_action_ops *act, } } write_unlock(&act_mod_lock); - if (!err) + if (!err) { unregister_pernet_subsys(ops); + if (ops->id) + tcf_pernet_del_id_list(*ops->id); + } return err; } EXPORT_SYMBOL(tcf_unregister_action); @@ -735,6 +1042,9 @@ restart_act_graph: jmp_prgcnt -= 1; continue; } + + if (tc_act_skip_sw(a->tcfa_flags)) + continue; repeat: ret = a->ops->act(skb, a, res); if (ret == TC_ACT_REPEAT) @@ -821,6 +1131,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) int err = -EINVAL; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; + u32 flags; if (tcf_action_dump_terse(skb, a, false)) goto nla_put_failure; @@ -835,9 +1146,13 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; - if (a->tcfa_flags && + flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; + if (flags && nla_put_bitfield32(skb, TCA_ACT_FLAGS, - a->tcfa_flags, a->tcfa_flags)) + flags, flags)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); @@ -919,7 +1234,9 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, - [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS), + [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS | + TCA_ACT_FLAGS_SKIP_HW | + TCA_ACT_FLAGS_SKIP_SW), [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; @@ -1032,8 +1349,13 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); - if (tb[TCA_ACT_FLAGS]) + if (tb[TCA_ACT_FLAGS]) { userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + if (!tc_act_flags_valid(userflags.value)) { + err = -EINVAL; + goto err_out; + } + } err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, userflags.value | flags, extack); @@ -1061,11 +1383,17 @@ err_out: return ERR_PTR(err); } +static bool tc_act_bind(u32 flags) +{ + return !!(flags & TCA_ACT_FLAGS_BIND); +} + /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], - int init_res[], size_t *attr_size, u32 flags, + int init_res[], size_t *attr_size, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; @@ -1103,6 +1431,22 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, sz += tcf_action_fill_size(act); /* Start from index 0 */ actions[i - 1] = act; + if (tc_act_bind(flags)) { + bool skip_sw = tc_skip_sw(fl_flags); + bool skip_hw = tc_skip_hw(fl_flags); + + if (tc_act_bind(act->tcfa_flags)) + continue; + if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || + skip_hw != tc_act_skip_hw(act->tcfa_flags)) { + err = -EINVAL; + goto err; + } + } else { + err = tcf_action_offload_add(act, extack); + if (tc_act_skip_sw(act->tcfa_flags) && err) + goto err; + } } /* We have to commit them all together, because if any error happened in @@ -1154,6 +1498,9 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, if (p == NULL) goto errout; + /* update hw stats for this action */ + tcf_action_update_hw_stats(p); + /* compat_mode being true specifies a call that is supposed * to add additional backward compatibility statistic TLVs. */ @@ -1396,6 +1743,96 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) } static int +tcf_reoffload_del_notify(struct net *net, struct tc_action *action) +{ + size_t attr_size = tcf_action_fill_size(action); + struct tc_action *actions[TCA_ACT_MAX_PRIO] = { + [0] = action, + }; + const struct tc_action_ops *ops = action->ops; + struct sk_buff *skb; + int ret; + + skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, + GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + ret = tcf_idr_release_unsafe(action); + if (ret == ACT_P_DELETED) { + module_put(ops->owner); + ret = rtnetlink_send(skb, net, 0, RTNLGRP_TC, 0); + } else { + kfree_skb(skb); + } + + return ret; +} + +int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add) +{ + struct tc_act_pernet_id *id_ptr; + struct tcf_idrinfo *idrinfo; + struct tc_action_net *tn; + struct tc_action *p; + unsigned int act_id; + unsigned long tmp; + unsigned long id; + struct idr *idr; + struct net *net; + int ret; + + if (!cb) + return -EINVAL; + + down_read(&net_rwsem); + mutex_lock(&act_id_mutex); + + for_each_net(net) { + list_for_each_entry(id_ptr, &act_pernet_id_list, list) { + act_id = id_ptr->id; + tn = net_generic(net, act_id); + if (!tn) + continue; + idrinfo = tn->idrinfo; + if (!idrinfo) + continue; + + mutex_lock(&idrinfo->lock); + idr = &idrinfo->action_idr; + idr_for_each_entry_ul(idr, p, tmp, id) { + if (IS_ERR(p) || tc_act_bind(p->tcfa_flags)) + continue; + if (add) { + tcf_action_offload_add_ex(p, NULL, cb, + cb_priv); + continue; + } + + /* cb unregister to update hw count */ + ret = tcf_action_offload_del_ex(p, cb, cb_priv); + if (ret < 0) + continue; + if (tc_act_skip_sw(p->tcfa_flags) && + !tc_act_in_hw(p)) + tcf_reoffload_del_notify(net, p); + } + mutex_unlock(&idrinfo->lock); + } + } + mutex_unlock(&act_id_mutex); + up_read(&net_rwsem); + + return 0; +} + +static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { @@ -1508,7 +1945,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, for (loop = 0; loop < 10; loop++) { ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, - &attr_size, flags, extack); + &attr_size, flags, 0, extack); if (ret != -EAGAIN) break; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index f2bf896331a5..a77d8908e737 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -305,7 +305,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ret = tcf_idr_check_alloc(tn, &index, act, bind); if (!ret) { ret = tcf_idr_create(tn, index, est, act, - &act_bpf_ops, bind, true, 0); + &act_bpf_ops, bind, true, flags); if (ret < 0) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 94e78ac7a748..09e2aafc8943 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -124,7 +124,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ret = tcf_idr_check_alloc(tn, &index, a, bind); if (!ret) { ret = tcf_idr_create(tn, index, est, a, - &act_connmark_ops, bind, false, 0); + &act_connmark_ops, bind, false, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index a15ec95e69c3..e0f515b774ca 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -695,6 +695,24 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_csum)); } +static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + entry->id = FLOW_ACTION_CSUM; + entry->csum_flags = tcf_csum_update_flags(act); + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + fl_action->id = FLOW_ACTION_CSUM; + } + + return 0; +} + static struct tc_action_ops act_csum_ops = { .kind = "csum", .id = TCA_ID_CSUM, @@ -706,6 +724,7 @@ static struct tc_action_ops act_csum_ops = { .walk = tcf_csum_walker, .lookup = tcf_csum_search, .get_fill_size = tcf_csum_get_fill_size, + .offload_act_setup = tcf_csum_offload_act_setup, .size = sizeof(struct tcf_csum), }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ab1810f2e660..1c537913a189 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1493,6 +1493,26 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); } +static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + entry->id = FLOW_ACTION_CT; + entry->ct.action = tcf_ct_action(act); + entry->ct.zone = tcf_ct_zone(act); + entry->ct.flow_table = tcf_ct_ft(act); + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + fl_action->id = FLOW_ACTION_CT; + } + + return 0; +} + static struct tc_action_ops act_ct_ops = { .kind = "ct", .id = TCA_ID_CT, @@ -1504,6 +1524,7 @@ static struct tc_action_ops act_ct_ops = { .walk = tcf_ct_walker, .lookup = tcf_ct_search, .stats_update = tcf_stats_update, + .offload_act_setup = tcf_ct_offload_act_setup, .size = sizeof(struct tcf_ct), }; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 549374a2d008..0281e45987a4 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -212,7 +212,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_ctinfo_ops, bind, false, 0); + &act_ctinfo_ops, bind, false, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index d8dce173df37..bde6a6c01e64 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -252,6 +252,43 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) return sz; } +static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + if (is_tcf_gact_ok(act)) { + entry->id = FLOW_ACTION_ACCEPT; + } else if (is_tcf_gact_shot(act)) { + entry->id = FLOW_ACTION_DROP; + } else if (is_tcf_gact_trap(act)) { + entry->id = FLOW_ACTION_TRAP; + } else if (is_tcf_gact_goto_chain(act)) { + entry->id = FLOW_ACTION_GOTO; + entry->chain_index = tcf_gact_goto_chain_index(act); + } else { + return -EOPNOTSUPP; + } + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + if (is_tcf_gact_ok(act)) + fl_action->id = FLOW_ACTION_ACCEPT; + else if (is_tcf_gact_shot(act)) + fl_action->id = FLOW_ACTION_DROP; + else if (is_tcf_gact_trap(act)) + fl_action->id = FLOW_ACTION_TRAP; + else if (is_tcf_gact_goto_chain(act)) + fl_action->id = FLOW_ACTION_GOTO; + else + return -EOPNOTSUPP; + } + + return 0; +} + static struct tc_action_ops act_gact_ops = { .kind = "gact", .id = TCA_ID_GACT, @@ -263,6 +300,7 @@ static struct tc_action_ops act_gact_ops = { .walk = tcf_gact_walker, .lookup = tcf_gact_search, .get_fill_size = tcf_gact_get_fill_size, + .offload_act_setup = tcf_gact_offload_act_setup, .size = sizeof(struct tcf_gact), }; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 7df72a4197a3..d56e73843a4b 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -357,7 +357,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_gate_ops, bind, false, 0); + &act_gate_ops, bind, false, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -597,6 +597,54 @@ static size_t tcf_gate_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_gate)); } +static void tcf_gate_entry_destructor(void *priv) +{ + struct action_gate_entry *oe = priv; + + kfree(oe); +} + +static int tcf_gate_get_entries(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->gate.entries = tcf_gate_get_list(act); + + if (!entry->gate.entries) + return -EINVAL; + + entry->destructor = tcf_gate_entry_destructor; + entry->destructor_priv = entry->gate.entries; + + return 0; +} + +static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + int err; + + if (bind) { + struct flow_action_entry *entry = entry_data; + + entry->id = FLOW_ACTION_GATE; + entry->gate.prio = tcf_gate_prio(act); + entry->gate.basetime = tcf_gate_basetime(act); + entry->gate.cycletime = tcf_gate_cycletime(act); + entry->gate.cycletimeext = tcf_gate_cycletimeext(act); + entry->gate.num_entries = tcf_gate_num_entries(act); + err = tcf_gate_get_entries(entry, act); + if (err) + return err; + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + fl_action->id = FLOW_ACTION_GATE; + } + + return 0; +} + static struct tc_action_ops act_gate_ops = { .kind = "gate", .id = TCA_ID_GATE, @@ -609,6 +657,7 @@ static struct tc_action_ops act_gate_ops = { .stats_update = tcf_gate_stats_update, .get_fill_size = tcf_gate_get_fill_size, .lookup = tcf_gate_search, + .offload_act_setup = tcf_gate_offload_act_setup, .size = sizeof(struct tcf_gate), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index b757f90a2d58..41ba55e60b1b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -553,7 +553,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_ife_ops, - bind, true, 0); + bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); kfree(p); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 265b1443e252..2f3d507c24a1 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -145,7 +145,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, ops, bind, - false, 0); + false, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 952416bd65e6..39acd1d18609 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -450,6 +450,55 @@ static size_t tcf_mirred_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_mirred)); } +static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->dev = act->ops->get_dev(act, &entry->destructor); + if (!entry->dev) + return; + entry->destructor_priv = entry->dev; +} + +static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + if (is_tcf_mirred_egress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT; + tcf_offload_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_egress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED; + tcf_offload_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT_INGRESS; + tcf_offload_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED_INGRESS; + tcf_offload_mirred_get_dev(entry, act); + } else { + return -EOPNOTSUPP; + } + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + if (is_tcf_mirred_egress_redirect(act)) + fl_action->id = FLOW_ACTION_REDIRECT; + else if (is_tcf_mirred_egress_mirror(act)) + fl_action->id = FLOW_ACTION_MIRRED; + else if (is_tcf_mirred_ingress_redirect(act)) + fl_action->id = FLOW_ACTION_REDIRECT_INGRESS; + else if (is_tcf_mirred_ingress_mirror(act)) + fl_action->id = FLOW_ACTION_MIRRED_INGRESS; + else + return -EOPNOTSUPP; + } + + return 0; +} + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .id = TCA_ID_MIRRED, @@ -462,6 +511,7 @@ static struct tc_action_ops act_mirred_ops = { .walk = tcf_mirred_walker, .lookup = tcf_mirred_search, .get_fill_size = tcf_mirred_get_fill_size, + .offload_act_setup = tcf_mirred_offload_act_setup, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, }; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 8faa4c58305e..b9ff3459fdab 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -248,7 +248,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_mpls_ops, bind, true, 0); + &act_mpls_ops, bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -384,6 +384,57 @@ static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + entry->id = FLOW_ACTION_MPLS_PUSH; + entry->mpls_push.proto = tcf_mpls_proto(act); + entry->mpls_push.label = tcf_mpls_label(act); + entry->mpls_push.tc = tcf_mpls_tc(act); + entry->mpls_push.bos = tcf_mpls_bos(act); + entry->mpls_push.ttl = tcf_mpls_ttl(act); + break; + case TCA_MPLS_ACT_POP: + entry->id = FLOW_ACTION_MPLS_POP; + entry->mpls_pop.proto = tcf_mpls_proto(act); + break; + case TCA_MPLS_ACT_MODIFY: + entry->id = FLOW_ACTION_MPLS_MANGLE; + entry->mpls_mangle.label = tcf_mpls_label(act); + entry->mpls_mangle.tc = tcf_mpls_tc(act); + entry->mpls_mangle.bos = tcf_mpls_bos(act); + entry->mpls_mangle.ttl = tcf_mpls_ttl(act); + break; + default: + return -EOPNOTSUPP; + } + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + fl_action->id = FLOW_ACTION_MPLS_PUSH; + break; + case TCA_MPLS_ACT_POP: + fl_action->id = FLOW_ACTION_MPLS_POP; + break; + case TCA_MPLS_ACT_MODIFY: + fl_action->id = FLOW_ACTION_MPLS_MANGLE; + break; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + static struct tc_action_ops act_mpls_ops = { .kind = "mpls", .id = TCA_ID_MPLS, @@ -394,6 +445,7 @@ static struct tc_action_ops act_mpls_ops = { .cleanup = tcf_mpls_cleanup, .walk = tcf_mpls_walker, .lookup = tcf_mpls_search, + .offload_act_setup = tcf_mpls_offload_act_setup, .size = sizeof(struct tcf_mpls), }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7dd6b586ba7f..2a39b3729e84 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -61,7 +61,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_nat_ops, bind, false, 0); + &act_nat_ops, bind, false, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c6c862c459cc..31fcd279c177 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -189,7 +189,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { ret = tcf_idr_create(tn, index, est, a, - &act_pedit_ops, bind, false, 0); + &act_pedit_ops, bind, false, flags); if (ret) { tcf_idr_cleanup(tn, index); goto out_free; @@ -487,6 +487,39 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + int k; + + for (k = 0; k < tcf_pedit_nkeys(act); k++) { + switch (tcf_pedit_cmd(act, k)) { + case TCA_PEDIT_KEY_EX_CMD_SET: + entry->id = FLOW_ACTION_MANGLE; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + entry->id = FLOW_ACTION_ADD; + break; + default: + return -EOPNOTSUPP; + } + entry->mangle.htype = tcf_pedit_htype(act, k); + entry->mangle.mask = tcf_pedit_mask(act, k); + entry->mangle.val = tcf_pedit_val(act, k); + entry->mangle.offset = tcf_pedit_offset(act, k); + entry->hw_stats = tc_act_hw_stats(act->hw_stats); + entry++; + } + *index_inc = k; + } else { + return -EOPNOTSUPP; + } + + return 0; +} + static struct tc_action_ops act_pedit_ops = { .kind = "pedit", .id = TCA_ID_PEDIT, @@ -498,6 +531,7 @@ static struct tc_action_ops act_pedit_ops = { .init = tcf_pedit_init, .walk = tcf_pedit_walker, .lookup = tcf_pedit_search, + .offload_act_setup = tcf_pedit_offload_act_setup, .size = sizeof(struct tcf_pedit), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 9e77ba8401e5..0923aa2b8f8a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -90,7 +90,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, NULL, a, - &act_police_ops, bind, true, 0); + &act_police_ops, bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -405,6 +405,30 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + entry->id = FLOW_ACTION_POLICE; + entry->police.burst = tcf_police_burst(act); + entry->police.rate_bytes_ps = + tcf_police_rate_bytes_ps(act); + entry->police.burst_pkt = tcf_police_burst_pkt(act); + entry->police.rate_pkt_ps = + tcf_police_rate_pkt_ps(act); + entry->police.mtu = tcf_police_tcfp_mtu(act); + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + fl_action->id = FLOW_ACTION_POLICE; + } + + return 0; +} + MODULE_AUTHOR("Alexey Kuznetsov"); MODULE_DESCRIPTION("Policing actions"); MODULE_LICENSE("GPL"); @@ -420,6 +444,7 @@ static struct tc_action_ops act_police_ops = { .walk = tcf_police_walker, .lookup = tcf_police_search, .cleanup = tcf_police_cleanup, + .offload_act_setup = tcf_police_offload_act_setup, .size = sizeof(struct tcf_police), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index ce859b0e0deb..9a22cdda6bbd 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -70,7 +70,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_sample_ops, bind, true, 0); + &act_sample_ops, bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -282,6 +282,35 @@ tcf_sample_get_group(const struct tc_action *a, return group; } +static void tcf_offload_sample_get_group(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->sample.psample_group = + act->ops->get_psample_group(act, &entry->destructor); + entry->destructor_priv = entry->sample.psample_group; +} + +static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + entry->id = FLOW_ACTION_SAMPLE; + entry->sample.trunc_size = tcf_sample_trunc_size(act); + entry->sample.truncate = tcf_sample_truncate(act); + entry->sample.rate = tcf_sample_rate(act); + tcf_offload_sample_get_group(entry, act); + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + fl_action->id = FLOW_ACTION_SAMPLE; + } + + return 0; +} + static struct tc_action_ops act_sample_ops = { .kind = "sample", .id = TCA_ID_SAMPLE, @@ -294,6 +323,7 @@ static struct tc_action_ops act_sample_ops = { .walk = tcf_sample_walker, .lookup = tcf_sample_search, .get_psample_group = tcf_sample_get_group, + .offload_act_setup = tcf_sample_offload_act_setup, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e617ab4505ca..8c1d60bde93e 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -129,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_simp_ops, bind, false, 0); + &act_simp_ops, bind, false, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index d30ecbfc8f84..ceba11b198bb 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -176,7 +176,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_skbedit_ops, bind, true, 0); + &act_skbedit_ops, bind, true, act_flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -327,6 +327,41 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) + nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */ } +static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + if (is_tcf_skbedit_mark(act)) { + entry->id = FLOW_ACTION_MARK; + entry->mark = tcf_skbedit_mark(act); + } else if (is_tcf_skbedit_ptype(act)) { + entry->id = FLOW_ACTION_PTYPE; + entry->ptype = tcf_skbedit_ptype(act); + } else if (is_tcf_skbedit_priority(act)) { + entry->id = FLOW_ACTION_PRIORITY; + entry->priority = tcf_skbedit_priority(act); + } else { + return -EOPNOTSUPP; + } + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + if (is_tcf_skbedit_mark(act)) + fl_action->id = FLOW_ACTION_MARK; + else if (is_tcf_skbedit_ptype(act)) + fl_action->id = FLOW_ACTION_PTYPE; + else if (is_tcf_skbedit_priority(act)) + fl_action->id = FLOW_ACTION_PRIORITY; + else + return -EOPNOTSUPP; + } + + return 0; +} + static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .id = TCA_ID_SKBEDIT, @@ -339,6 +374,7 @@ static struct tc_action_ops act_skbedit_ops = { .walk = tcf_skbedit_walker, .get_fill_size = tcf_skbedit_get_fill_size, .lookup = tcf_skbedit_search, + .offload_act_setup = tcf_skbedit_offload_act_setup, .size = sizeof(struct tcf_skbedit), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 9b6b52c5e24e..2083612d8780 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -168,7 +168,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (!exists) { ret = tcf_idr_create(tn, index, est, a, - &act_skbmod_ops, bind, true, 0); + &act_skbmod_ops, bind, true, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index d9cd174eecb7..23aba03d26a8 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -787,6 +787,59 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static void tcf_tunnel_encap_put_tunnel(void *priv) +{ + struct ip_tunnel_info *tunnel = priv; + + kfree(tunnel); +} + +static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) + return -ENOMEM; + entry->destructor = tcf_tunnel_encap_put_tunnel; + entry->destructor_priv = entry->tunnel; + return 0; +} + +static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, + void *entry_data, + u32 *index_inc, + bool bind) +{ + int err; + + if (bind) { + struct flow_action_entry *entry = entry_data; + + if (is_tcf_tunnel_set(act)) { + entry->id = FLOW_ACTION_TUNNEL_ENCAP; + err = tcf_tunnel_encap_get_tunnel(entry, act); + if (err) + return err; + } else if (is_tcf_tunnel_release(act)) { + entry->id = FLOW_ACTION_TUNNEL_DECAP; + } else { + return -EOPNOTSUPP; + } + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + if (is_tcf_tunnel_set(act)) + fl_action->id = FLOW_ACTION_TUNNEL_ENCAP; + else if (is_tcf_tunnel_release(act)) + fl_action->id = FLOW_ACTION_TUNNEL_DECAP; + else + return -EOPNOTSUPP; + } + + return 0; +} + static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", .id = TCA_ID_TUNNEL_KEY, @@ -797,6 +850,7 @@ static struct tc_action_ops act_tunnel_key_ops = { .cleanup = tunnel_key_release, .walk = tunnel_key_walker, .lookup = tunnel_key_search, + .offload_act_setup = tcf_tunnel_key_offload_act_setup, .size = sizeof(struct tcf_tunnel_key), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index e4dc5a555bd8..756e2dcde1cd 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -368,6 +368,53 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act) + nla_total_size(sizeof(u8)); /* TCA_VLAN_PUSH_VLAN_PRIORITY */ } +static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind) +{ + if (bind) { + struct flow_action_entry *entry = entry_data; + + switch (tcf_vlan_action(act)) { + case TCA_VLAN_ACT_PUSH: + entry->id = FLOW_ACTION_VLAN_PUSH; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + case TCA_VLAN_ACT_POP: + entry->id = FLOW_ACTION_VLAN_POP; + break; + case TCA_VLAN_ACT_MODIFY: + entry->id = FLOW_ACTION_VLAN_MANGLE; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + default: + return -EOPNOTSUPP; + } + *index_inc = 1; + } else { + struct flow_offload_action *fl_action = entry_data; + + switch (tcf_vlan_action(act)) { + case TCA_VLAN_ACT_PUSH: + fl_action->id = FLOW_ACTION_VLAN_PUSH; + break; + case TCA_VLAN_ACT_POP: + fl_action->id = FLOW_ACTION_VLAN_POP; + break; + case TCA_VLAN_ACT_MODIFY: + fl_action->id = FLOW_ACTION_VLAN_MANGLE; + break; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .id = TCA_ID_VLAN, @@ -380,6 +427,7 @@ static struct tc_action_ops act_vlan_ops = { .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, .lookup = tcf_vlan_search, + .offload_act_setup = tcf_vlan_offload_act_setup, .size = sizeof(struct tcf_vlan), }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e54f0a42270c..563b00481866 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3025,9 +3025,9 @@ void tcf_exts_destroy(struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_destroy); -int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, - u32 flags, struct netlink_ext_ack *extack) +int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -3061,7 +3061,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, flags |= TCA_ACT_FLAGS_BIND; err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, exts->actions, init_res, - &attr_size, flags, extack); + &attr_size, flags, fl_flags, + extack); if (err < 0) return err; exts->nr_actions = err; @@ -3077,6 +3078,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, return 0; } +EXPORT_SYMBOL(tcf_exts_validate_ex); + +int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, struct netlink_ext_ack *extack) +{ + return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts, + flags, 0, extack); +} EXPORT_SYMBOL(tcf_exts_validate); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) @@ -3320,7 +3330,7 @@ err_unlock: up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); - return ok_count < 0 ? ok_count : 0; + return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_add); @@ -3376,7 +3386,7 @@ err_unlock: up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); - return ok_count < 0 ? ok_count : 0; + return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_replace); @@ -3414,7 +3424,7 @@ retry: up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); - return ok_count < 0 ? ok_count : 0; + return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_destroy); @@ -3461,7 +3471,7 @@ static void tcf_act_put_cookie(struct flow_action_entry *entry) flow_action_cookie_destroy(entry->cookie); } -void tc_cleanup_flow_action(struct flow_action *flow_action) +void tc_cleanup_offload_action(struct flow_action *flow_action) { struct flow_action_entry *entry; int i; @@ -3472,93 +3482,37 @@ void tc_cleanup_flow_action(struct flow_action *flow_action) entry->destructor(entry->destructor_priv); } } -EXPORT_SYMBOL(tc_cleanup_flow_action); +EXPORT_SYMBOL(tc_cleanup_offload_action); -static void tcf_mirred_get_dev(struct flow_action_entry *entry, - const struct tc_action *act) +static int tc_setup_offload_act(struct tc_action *act, + struct flow_action_entry *entry, + u32 *index_inc) { #ifdef CONFIG_NET_CLS_ACT - entry->dev = act->ops->get_dev(act, &entry->destructor); - if (!entry->dev) - return; - entry->destructor_priv = entry->dev; -#endif -} - -static void tcf_tunnel_encap_put_tunnel(void *priv) -{ - struct ip_tunnel_info *tunnel = priv; - - kfree(tunnel); -} - -static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, - const struct tc_action *act) -{ - entry->tunnel = tcf_tunnel_info_copy(act); - if (!entry->tunnel) - return -ENOMEM; - entry->destructor = tcf_tunnel_encap_put_tunnel; - entry->destructor_priv = entry->tunnel; + if (act->ops->offload_act_setup) + return act->ops->offload_act_setup(act, entry, index_inc, true); + else + return -EOPNOTSUPP; +#else return 0; -} - -static void tcf_sample_get_group(struct flow_action_entry *entry, - const struct tc_action *act) -{ -#ifdef CONFIG_NET_CLS_ACT - entry->sample.psample_group = - act->ops->get_psample_group(act, &entry->destructor); - entry->destructor_priv = entry->sample.psample_group; #endif } -static void tcf_gate_entry_destructor(void *priv) -{ - struct action_gate_entry *oe = priv; - - kfree(oe); -} - -static int tcf_gate_get_entries(struct flow_action_entry *entry, - const struct tc_action *act) -{ - entry->gate.entries = tcf_gate_get_list(act); - - if (!entry->gate.entries) - return -EINVAL; - - entry->destructor = tcf_gate_entry_destructor; - entry->destructor_priv = entry->gate.entries; - - return 0; -} - -static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) -{ - if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) - return FLOW_ACTION_HW_STATS_DONT_CARE; - else if (!hw_stats) - return FLOW_ACTION_HW_STATS_DISABLED; - - return hw_stats; -} - -int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts) +int tc_setup_action(struct flow_action *flow_action, + struct tc_action *actions[]) { + int i, j, index, err = 0; struct tc_action *act; - int i, j, k, err = 0; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE); BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED); - if (!exts) + if (!actions) return 0; j = 0; - tcf_exts_for_each_action(i, act, exts) { + tcf_act_for_each_action(i, act, actions) { struct flow_action_entry *entry; entry = &flow_action->entries[j]; @@ -3568,165 +3522,39 @@ int tc_setup_flow_action(struct flow_action *flow_action, goto err_out_locked; entry->hw_stats = tc_act_hw_stats(act->hw_stats); - - if (is_tcf_gact_ok(act)) { - entry->id = FLOW_ACTION_ACCEPT; - } else if (is_tcf_gact_shot(act)) { - entry->id = FLOW_ACTION_DROP; - } else if (is_tcf_gact_trap(act)) { - entry->id = FLOW_ACTION_TRAP; - } else if (is_tcf_gact_goto_chain(act)) { - entry->id = FLOW_ACTION_GOTO; - entry->chain_index = tcf_gact_goto_chain_index(act); - } else if (is_tcf_mirred_egress_redirect(act)) { - entry->id = FLOW_ACTION_REDIRECT; - tcf_mirred_get_dev(entry, act); - } else if (is_tcf_mirred_egress_mirror(act)) { - entry->id = FLOW_ACTION_MIRRED; - tcf_mirred_get_dev(entry, act); - } else if (is_tcf_mirred_ingress_redirect(act)) { - entry->id = FLOW_ACTION_REDIRECT_INGRESS; - tcf_mirred_get_dev(entry, act); - } else if (is_tcf_mirred_ingress_mirror(act)) { - entry->id = FLOW_ACTION_MIRRED_INGRESS; - tcf_mirred_get_dev(entry, act); - } else if (is_tcf_vlan(act)) { - switch (tcf_vlan_action(act)) { - case TCA_VLAN_ACT_PUSH: - entry->id = FLOW_ACTION_VLAN_PUSH; - entry->vlan.vid = tcf_vlan_push_vid(act); - entry->vlan.proto = tcf_vlan_push_proto(act); - entry->vlan.prio = tcf_vlan_push_prio(act); - break; - case TCA_VLAN_ACT_POP: - entry->id = FLOW_ACTION_VLAN_POP; - break; - case TCA_VLAN_ACT_MODIFY: - entry->id = FLOW_ACTION_VLAN_MANGLE; - entry->vlan.vid = tcf_vlan_push_vid(act); - entry->vlan.proto = tcf_vlan_push_proto(act); - entry->vlan.prio = tcf_vlan_push_prio(act); - break; - default: - err = -EOPNOTSUPP; - goto err_out_locked; - } - } else if (is_tcf_tunnel_set(act)) { - entry->id = FLOW_ACTION_TUNNEL_ENCAP; - err = tcf_tunnel_encap_get_tunnel(entry, act); - if (err) - goto err_out_locked; - } else if (is_tcf_tunnel_release(act)) { - entry->id = FLOW_ACTION_TUNNEL_DECAP; - } else if (is_tcf_pedit(act)) { - for (k = 0; k < tcf_pedit_nkeys(act); k++) { - switch (tcf_pedit_cmd(act, k)) { - case TCA_PEDIT_KEY_EX_CMD_SET: - entry->id = FLOW_ACTION_MANGLE; - break; - case TCA_PEDIT_KEY_EX_CMD_ADD: - entry->id = FLOW_ACTION_ADD; - break; - default: - err = -EOPNOTSUPP; - goto err_out_locked; - } - entry->mangle.htype = tcf_pedit_htype(act, k); - entry->mangle.mask = tcf_pedit_mask(act, k); - entry->mangle.val = tcf_pedit_val(act, k); - entry->mangle.offset = tcf_pedit_offset(act, k); - entry->hw_stats = tc_act_hw_stats(act->hw_stats); - entry = &flow_action->entries[++j]; - } - } else if (is_tcf_csum(act)) { - entry->id = FLOW_ACTION_CSUM; - entry->csum_flags = tcf_csum_update_flags(act); - } else if (is_tcf_skbedit_mark(act)) { - entry->id = FLOW_ACTION_MARK; - entry->mark = tcf_skbedit_mark(act); - } else if (is_tcf_sample(act)) { - entry->id = FLOW_ACTION_SAMPLE; - entry->sample.trunc_size = tcf_sample_trunc_size(act); - entry->sample.truncate = tcf_sample_truncate(act); - entry->sample.rate = tcf_sample_rate(act); - tcf_sample_get_group(entry, act); - } else if (is_tcf_police(act)) { - entry->id = FLOW_ACTION_POLICE; - entry->police.burst = tcf_police_burst(act); - entry->police.rate_bytes_ps = - tcf_police_rate_bytes_ps(act); - entry->police.burst_pkt = tcf_police_burst_pkt(act); - entry->police.rate_pkt_ps = - tcf_police_rate_pkt_ps(act); - entry->police.mtu = tcf_police_tcfp_mtu(act); - entry->police.index = act->tcfa_index; - } else if (is_tcf_ct(act)) { - entry->id = FLOW_ACTION_CT; - entry->ct.action = tcf_ct_action(act); - entry->ct.zone = tcf_ct_zone(act); - entry->ct.flow_table = tcf_ct_ft(act); - } else if (is_tcf_mpls(act)) { - switch (tcf_mpls_action(act)) { - case TCA_MPLS_ACT_PUSH: - entry->id = FLOW_ACTION_MPLS_PUSH; - entry->mpls_push.proto = tcf_mpls_proto(act); - entry->mpls_push.label = tcf_mpls_label(act); - entry->mpls_push.tc = tcf_mpls_tc(act); - entry->mpls_push.bos = tcf_mpls_bos(act); - entry->mpls_push.ttl = tcf_mpls_ttl(act); - break; - case TCA_MPLS_ACT_POP: - entry->id = FLOW_ACTION_MPLS_POP; - entry->mpls_pop.proto = tcf_mpls_proto(act); - break; - case TCA_MPLS_ACT_MODIFY: - entry->id = FLOW_ACTION_MPLS_MANGLE; - entry->mpls_mangle.label = tcf_mpls_label(act); - entry->mpls_mangle.tc = tcf_mpls_tc(act); - entry->mpls_mangle.bos = tcf_mpls_bos(act); - entry->mpls_mangle.ttl = tcf_mpls_ttl(act); - break; - default: - err = -EOPNOTSUPP; - goto err_out_locked; - } - } else if (is_tcf_skbedit_ptype(act)) { - entry->id = FLOW_ACTION_PTYPE; - entry->ptype = tcf_skbedit_ptype(act); - } else if (is_tcf_skbedit_priority(act)) { - entry->id = FLOW_ACTION_PRIORITY; - entry->priority = tcf_skbedit_priority(act); - } else if (is_tcf_gate(act)) { - entry->id = FLOW_ACTION_GATE; - entry->gate.index = tcf_gate_index(act); - entry->gate.prio = tcf_gate_prio(act); - entry->gate.basetime = tcf_gate_basetime(act); - entry->gate.cycletime = tcf_gate_cycletime(act); - entry->gate.cycletimeext = tcf_gate_cycletimeext(act); - entry->gate.num_entries = tcf_gate_num_entries(act); - err = tcf_gate_get_entries(entry, act); - if (err) - goto err_out_locked; - } else { - err = -EOPNOTSUPP; + entry->hw_index = act->tcfa_index; + index = 0; + err = tc_setup_offload_act(act, entry, &index); + if (!err) + j += index; + else goto err_out_locked; - } spin_unlock_bh(&act->tcfa_lock); - - if (!is_tcf_pedit(act)) - j++; } err_out: if (err) - tc_cleanup_flow_action(flow_action); + tc_cleanup_offload_action(flow_action); return err; err_out_locked: spin_unlock_bh(&act->tcfa_lock); goto err_out; } -EXPORT_SYMBOL(tc_setup_flow_action); + +int tc_setup_offload_action(struct flow_action *flow_action, + const struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + if (!exts) + return 0; + + return tc_setup_action(flow_action, exts->actions); +#else + return 0; +#endif +} +EXPORT_SYMBOL(tc_setup_offload_action); unsigned int tcf_exts_num_actions(struct tcf_exts *exts) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index aab13ba11767..c73f65738ef7 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -461,7 +461,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); if (skip_sw) { @@ -473,7 +473,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw, &f->flags, &f->in_hw_count, rtnl_held); - tc_cleanup_flow_action(&cls_flower.rule->action); + tc_cleanup_offload_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { @@ -501,12 +501,12 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, rtnl_held); - tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, - cls_flower.stats.pkts, - cls_flower.stats.drops, - cls_flower.stats.lastused, - cls_flower.stats.used_hw_stats, - cls_flower.stats.used_hw_stats_valid); + tcf_exts_hw_stats_update(&f->exts, cls_flower.stats.bytes, + cls_flower.stats.pkts, + cls_flower.stats.drops, + cls_flower.stats.lastused, + cls_flower.stats.used_hw_stats, + cls_flower.stats.used_hw_stats_valid); } static void __fl_put(struct cls_fl_filter *f) @@ -1917,12 +1917,14 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, struct nlattr *est, - struct fl_flow_tmplt *tmplt, u32 flags, + struct fl_flow_tmplt *tmplt, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); + err = tcf_exts_validate_ex(net, tp, tb, est, &f->exts, flags, + fl_flags, extack); if (err < 0) return err; @@ -2036,7 +2038,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], - tp->chain->tmplt_priv, flags, extack); + tp->chain->tmplt_priv, flags, fnew->flags, + extack); if (err) goto errout; @@ -2266,7 +2269,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { @@ -2283,7 +2286,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_SETUP_CLSFLOWER, &cls_flower, cb_priv, &f->flags, &f->in_hw_count); - tc_cleanup_flow_action(&cls_flower.rule->action); + tc_cleanup_offload_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 24f0046ce0b3..ca5670fd5228 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -111,7 +111,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw, &head->flags, &head->in_hw_count, true); - tc_cleanup_flow_action(&cls_mall.rule->action); + tc_cleanup_offload_action(&cls_mall.rule->action); kfree(cls_mall.rule); if (err) { @@ -163,12 +163,13 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { static int mall_set_parms(struct net *net, struct tcf_proto *tp, struct cls_mall_head *head, unsigned long base, struct nlattr **tb, - struct nlattr *est, u32 flags, + struct nlattr *est, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack); + err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags, + fl_flags, extack); if (err < 0) return err; @@ -226,8 +227,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, goto err_alloc_percpu; } - err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags, - extack); + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], + flags, new->flags, extack); if (err) goto err_set_parms; @@ -301,7 +302,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { @@ -314,7 +315,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv, &head->flags, &head->in_hw_count); - tc_cleanup_flow_action(&cls_mall.rule->action); + tc_cleanup_offload_action(&cls_mall.rule->action); kfree(cls_mall.rule); if (err) @@ -336,11 +337,11 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); - tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, - cls_mall.stats.pkts, cls_mall.stats.drops, - cls_mall.stats.lastused, - cls_mall.stats.used_hw_stats, - cls_mall.stats.used_hw_stats_valid); + tcf_exts_hw_stats_update(&head->exts, cls_mall.stats.bytes, + cls_mall.stats.pkts, cls_mall.stats.drops, + cls_mall.stats.lastused, + cls_mall.stats.used_hw_stats, + cls_mall.stats.used_hw_stats_valid); } static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4272814487f0..cf5649292ee0 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -709,12 +709,13 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est, u32 flags, + struct nlattr *est, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack); + err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags, + fl_flags, extack); if (err < 0) return err; @@ -895,7 +896,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -ENOMEM; err = u32_set_parms(net, tp, base, new, tb, - tca[TCA_RATE], flags, extack); + tca[TCA_RATE], flags, new->flags, + extack); if (err) { u32_destroy_key(new, false); @@ -1060,8 +1062,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags, - extack); + err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], + flags, n->flags, extack); if (err == 0) { struct tc_u_knode __rcu **ins; struct tc_u_knode *pins; diff --git a/net/sctp/input.c b/net/sctp/input.c index 1f1786021d9c..90e12bafdd48 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -746,23 +746,21 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - epb = &ep->base; - epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); - head = &sctp_ep_hashtable[epb->hashent]; + ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); + head = &sctp_ep_hashtable[ep->hashent]; if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); - struct sctp_ep_common *epb2; + struct sctp_endpoint *ep2; struct list_head *list; int cnt = 0, err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; - sctp_for_each_hentry(epb2, &head->chain) { - struct sock *sk2 = epb2->sk; + sctp_for_each_hentry(ep2, &head->chain) { + struct sock *sk2 = ep2->base.sk; if (!net_eq(sock_net(sk2), net) || sk2 == sk || !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) || @@ -789,7 +787,7 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) } write_lock(&head->lock); - hlist_add_head(&epb->node, &head->chain); + hlist_add_head(&ep->node, &head->chain); write_unlock(&head->lock); return 0; } @@ -811,19 +809,16 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { struct sock *sk = ep->base.sk; struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - epb = &ep->base; + ep->hashent = sctp_ep_hashfn(sock_net(sk), ep->base.bind_addr.port); - epb->hashent = sctp_ep_hashfn(sock_net(sk), epb->bind_addr.port); - - head = &sctp_ep_hashtable[epb->hashent]; + head = &sctp_ep_hashtable[ep->hashent]; if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); write_lock(&head->lock); - hlist_del_init(&epb->node); + hlist_del_init(&ep->node); write_unlock(&head->lock); } @@ -856,7 +851,6 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( const union sctp_addr *paddr) { struct sctp_hashbucket *head; - struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; __be16 lport; @@ -866,8 +860,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( hash = sctp_ep_hashfn(net, ntohs(lport)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - ep = sctp_ep(epb); + sctp_for_each_hentry(ep, &head->chain) { if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 982a87b3e11f..f13d6a34f32f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -161,7 +161,6 @@ static void *sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) static int sctp_eps_seq_show(struct seq_file *seq, void *v) { struct sctp_hashbucket *head; - struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; int hash = *(loff_t *)v; @@ -171,18 +170,17 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) head = &sctp_ep_hashtable[hash]; read_lock_bh(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - ep = sctp_ep(epb); - sk = epb->sk; + sctp_for_each_hentry(ep, &head->chain) { + sk = ep->base.sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, - epb->bind_addr.port, + ep->base.bind_addr.port, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk)); - sctp_seq_dump_local_addrs(seq, epb); + sctp_seq_dump_local_addrs(seq, &ep->base); seq_printf(seq, "\n"); } read_unlock_bh(&head->lock); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 055a6d3ec6e2..eed3b87f51f4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5294,14 +5294,14 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p) { int err = 0; int hash = 0; - struct sctp_ep_common *epb; + struct sctp_endpoint *ep; struct sctp_hashbucket *head; for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize; hash++, head++) { read_lock_bh(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - err = cb(sctp_ep(epb), p); + sctp_for_each_hentry(ep, &head->chain) { + err = cb(ep, p); if (err) break; } diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 869c43d4414c..eb822052d344 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -245,19 +245,7 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) oper_freq - MHZ_TO_KHZ(oper_width) / 2) return false; break; - case NL80211_CHAN_WIDTH_40: - if (chandef->center_freq1 != control_freq + 10 && - chandef->center_freq1 != control_freq - 10) - return false; - if (chandef->center_freq2) - return false; - break; case NL80211_CHAN_WIDTH_80P80: - if (chandef->center_freq1 != control_freq + 30 && - chandef->center_freq1 != control_freq + 10 && - chandef->center_freq1 != control_freq - 10 && - chandef->center_freq1 != control_freq - 30) - return false; if (!chandef->center_freq2) return false; /* adjacent is not allowed -- that's a 160 MHz channel */ @@ -265,28 +253,42 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) chandef->center_freq2 - chandef->center_freq1 == 80) return false; break; - case NL80211_CHAN_WIDTH_80: - if (chandef->center_freq1 != control_freq + 30 && - chandef->center_freq1 != control_freq + 10 && - chandef->center_freq1 != control_freq - 10 && - chandef->center_freq1 != control_freq - 30) - return false; + default: if (chandef->center_freq2) return false; break; - case NL80211_CHAN_WIDTH_160: - if (chandef->center_freq1 != control_freq + 70 && - chandef->center_freq1 != control_freq + 50 && - chandef->center_freq1 != control_freq + 30 && - chandef->center_freq1 != control_freq + 10 && - chandef->center_freq1 != control_freq - 10 && - chandef->center_freq1 != control_freq - 30 && - chandef->center_freq1 != control_freq - 50 && - chandef->center_freq1 != control_freq - 70) - return false; - if (chandef->center_freq2) - return false; + } + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_1: + case NL80211_CHAN_WIDTH_2: + case NL80211_CHAN_WIDTH_4: + case NL80211_CHAN_WIDTH_8: + case NL80211_CHAN_WIDTH_16: + /* all checked above */ break; + case NL80211_CHAN_WIDTH_160: + if (chandef->center_freq1 == control_freq + 70 || + chandef->center_freq1 == control_freq + 50 || + chandef->center_freq1 == control_freq - 50 || + chandef->center_freq1 == control_freq - 70) + break; + fallthrough; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + if (chandef->center_freq1 == control_freq + 30 || + chandef->center_freq1 == control_freq - 30) + break; + fallthrough; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 == control_freq + 10 || + chandef->center_freq1 == control_freq - 10) + break; + fallthrough; default: return false; } @@ -712,6 +714,19 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, return false; } +static bool +cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev, + struct ieee80211_channel *channel) +{ + if (!rdev->background_radar_wdev) + return false; + + if (!cfg80211_chandef_valid(&rdev->background_radar_chandef)) + return false; + + return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel); +} + bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan) { @@ -728,6 +743,9 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan)) return true; + + if (cfg80211_offchan_chain_is_active(rdev, chan)) + return true; } return false; diff --git a/net/wireless/core.c b/net/wireless/core.c index eb297e1015e0..3a54c8e6b6c6 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -545,6 +545,10 @@ use_default_name: INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); + INIT_WORK(&rdev->background_cac_abort_wk, + cfg80211_background_cac_abort_wk); + INIT_DELAYED_WORK(&rdev->background_cac_done_wk, + cfg80211_background_cac_done_wk); init_waitqueue_head(&rdev->dev_wait); @@ -733,6 +737,7 @@ int wiphy_register(struct wiphy *wiphy) if (wiphy->interface_modes & ~(BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_P2P_CLIENT) | BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_MESH_POINT) | BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_P2P_DEVICE) | @@ -1054,11 +1059,13 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); + cancel_delayed_work_sync(&rdev->background_cac_done_wk); flush_work(&rdev->destroy_work); flush_work(&rdev->sched_scan_stop_wk); flush_work(&rdev->propagate_radar_detect_wk); flush_work(&rdev->propagate_cac_done_wk); flush_work(&rdev->mgmt_registrations_update_wk); + flush_work(&rdev->background_cac_abort_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -1207,6 +1214,8 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_pmsr_wdev_down(wdev); + cfg80211_stop_background_radar_detection(wdev); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: __cfg80211_leave_ibss(rdev, dev, true); diff --git a/net/wireless/core.h b/net/wireless/core.h index 1720abf36f92..3a7dbd63d8c6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -84,6 +84,11 @@ struct cfg80211_registered_device { struct delayed_work dfs_update_channels_wk; + struct wireless_dev *background_radar_wdev; + struct cfg80211_chan_def background_radar_chandef; + struct delayed_work background_cac_done_wk; + struct work_struct background_cac_abort_wk; + /* netlink port which started critical protocol (0 means not started) */ u32 crit_proto_nlportid; @@ -491,6 +496,17 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); +int +cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_chan_def *chandef); + +void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev); + +void cfg80211_background_cac_done_wk(struct work_struct *work); + +void cfg80211_background_cac_abort_wk(struct work_struct *work); + bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 783acd2c4211..c8155a483ec2 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -905,13 +905,13 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) } -void cfg80211_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - gfp_t gfp) +void __cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + bool offchan, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - trace_cfg80211_radar_event(wiphy, chandef); + trace_cfg80211_radar_event(wiphy, chandef, offchan); /* only set the chandef supplied channel to unavailable, in * case the radar is detected on only one of multiple channels @@ -919,6 +919,9 @@ void cfg80211_radar_event(struct wiphy *wiphy, */ cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); + if (offchan) + queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); + cfg80211_sched_dfs_chan_update(rdev); nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); @@ -926,7 +929,7 @@ void cfg80211_radar_event(struct wiphy *wiphy, memcpy(&rdev->radar_chandef, chandef, sizeof(struct cfg80211_chan_def)); queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk); } -EXPORT_SYMBOL(cfg80211_radar_event); +EXPORT_SYMBOL(__cfg80211_radar_event); void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, @@ -970,3 +973,143 @@ void cfg80211_cac_event(struct net_device *netdev, nl80211_radar_notify(rdev, chandef, event, netdev, gfp); } EXPORT_SYMBOL(cfg80211_cac_event); + +static void +__cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event) +{ + struct wiphy *wiphy = &rdev->wiphy; + struct net_device *netdev; + + lockdep_assert_wiphy(&rdev->wiphy); + + if (!cfg80211_chandef_valid(chandef)) + return; + + if (!rdev->background_radar_wdev) + return; + + switch (event) { + case NL80211_RADAR_CAC_FINISHED: + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); + memcpy(&rdev->cac_done_chandef, chandef, sizeof(*chandef)); + queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); + cfg80211_sched_dfs_chan_update(rdev); + wdev = rdev->background_radar_wdev; + break; + case NL80211_RADAR_CAC_ABORTED: + if (!cancel_delayed_work(&rdev->background_cac_done_wk)) + return; + wdev = rdev->background_radar_wdev; + break; + case NL80211_RADAR_CAC_STARTED: + break; + default: + return; + } + + netdev = wdev ? wdev->netdev : NULL; + nl80211_radar_notify(rdev, chandef, event, netdev, GFP_KERNEL); +} + +static void +cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, + const struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event) +{ + wiphy_lock(&rdev->wiphy); + __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, + chandef, event); + wiphy_unlock(&rdev->wiphy); +} + +void cfg80211_background_cac_done_wk(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct cfg80211_registered_device *rdev; + + rdev = container_of(delayed_work, struct cfg80211_registered_device, + background_cac_done_wk); + cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, + NL80211_RADAR_CAC_FINISHED); +} + +void cfg80211_background_cac_abort_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + background_cac_abort_wk); + cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, + NL80211_RADAR_CAC_ABORTED); +} + +void cfg80211_background_cac_abort(struct wiphy *wiphy) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); +} +EXPORT_SYMBOL(cfg80211_background_cac_abort); + +int +cfg80211_start_background_radar_detection(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_chan_def *chandef) +{ + unsigned int cac_time_ms; + int err; + + lockdep_assert_wiphy(&rdev->wiphy); + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_RADAR_BACKGROUND)) + return -EOPNOTSUPP; + + /* Offchannel chain already locked by another wdev */ + if (rdev->background_radar_wdev && rdev->background_radar_wdev != wdev) + return -EBUSY; + + /* CAC already in progress on the offchannel chain */ + if (rdev->background_radar_wdev == wdev && + delayed_work_pending(&rdev->background_cac_done_wk)) + return -EBUSY; + + err = rdev_set_radar_background(rdev, chandef); + if (err) + return err; + + cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, chandef); + if (!cac_time_ms) + cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; + + rdev->background_radar_chandef = *chandef; + rdev->background_radar_wdev = wdev; /* Get offchain ownership */ + + __cfg80211_background_cac_event(rdev, wdev, chandef, + NL80211_RADAR_CAC_STARTED); + queue_delayed_work(cfg80211_wq, &rdev->background_cac_done_wk, + msecs_to_jiffies(cac_time_ms)); + + return 0; +} + +void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + lockdep_assert_wiphy(wiphy); + + if (wdev != rdev->background_radar_wdev) + return; + + rdev_set_radar_background(rdev, NULL); + rdev->background_radar_wdev = NULL; /* Release offchain ownership */ + + __cfg80211_background_cac_event(rdev, wdev, + &rdev->background_radar_chandef, + NL80211_RADAR_CAC_ABORTED); +} diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a27b3b5fa210..578bff9c378b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -776,6 +776,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MBSSID_CONFIG] = NLA_POLICY_NESTED(nl80211_mbssid_config_policy), [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_RADAR_BACKGROUND] = { .type = NLA_FLAG }, + [NL80211_ATTR_AP_SETTINGS_FLAGS] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -3669,14 +3671,16 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_ADHOC: { - const u8 *ssid_ie; + const struct element *ssid_elem; + if (!wdev->current_bss) break; rcu_read_lock(); - ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, - WLAN_EID_SSID); - if (ssid_ie && - nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) + ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, + WLAN_EID_SSID); + if (ssid_elem && + nla_put(msg, NL80211_ATTR_SSID, ssid_elem->datalen, + ssid_elem->data)) goto nla_put_failure_rcu_locked; rcu_read_unlock(); break; @@ -5711,8 +5715,11 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) nl80211_calculate_ap_params(params); - if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) - params->flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; + if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) + params->flags = nla_get_u32( + info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]); + else if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) + params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; wdev_lock(wdev); err = rdev_start_ap(rdev, dev, params); @@ -9274,38 +9281,60 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; unsigned int cac_time_ms; - int err; + int err = -EINVAL; + + flush_delayed_work(&rdev->dfs_update_channels_wk); + + wiphy_lock(wiphy); dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) - return -EINVAL; + goto unlock; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) - return err; - - if (netif_carrier_ok(dev)) - return -EBUSY; - - if (wdev->cac_started) - return -EBUSY; + goto unlock; err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) - return err; + goto unlock; - if (err == 0) - return -EINVAL; + if (err == 0) { + err = -EINVAL; + goto unlock; + } - if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) - return -EINVAL; + if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) { + err = -EINVAL; + goto unlock; + } + + if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) { + err = cfg80211_start_background_radar_detection(rdev, wdev, + &chandef); + goto unlock; + } + + if (netif_carrier_ok(dev)) { + err = -EBUSY; + goto unlock; + } + + if (wdev->cac_started) { + err = -EBUSY; + goto unlock; + } /* CAC start is offloaded to HW and can't be started manually */ - if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) - return -EOPNOTSUPP; + if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) { + err = -EOPNOTSUPP; + goto unlock; + } - if (!rdev->ops->start_radar_detection) - return -EOPNOTSUPP; + if (!rdev->ops->start_radar_detection) { + err = -EOPNOTSUPP; + goto unlock; + } cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) @@ -9318,6 +9347,9 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->cac_start_time = jiffies; wdev->cac_time_ms = cac_time_ms; } +unlock: + wiphy_unlock(wiphy); + return err; } @@ -15954,7 +15986,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NO_WIPHY_MTX, }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, @@ -17035,6 +17068,44 @@ static void nl80211_send_remain_on_chan_event( nlmsg_free(msg); } +void cfg80211_assoc_comeback(struct net_device *netdev, + struct cfg80211_bss *bss, u32 timeout) +{ + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_assoc_comeback(wdev, bss->bssid, timeout); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ASSOC_COMEBACK); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bss->bssid) || + nla_put_u32(msg, NL80211_ATTR_TIMEOUT, timeout)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, GFP_KERNEL); + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_assoc_comeback); + void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration, gfp_t gfp) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index cc1efec4b27b..439bcf52369c 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1395,4 +1395,21 @@ rdev_set_fils_aad(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_radar_background(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret; + + if (!rdev->ops->set_radar_background) + return -EOPNOTSUPP; + + trace_rdev_set_radar_background(wiphy, chandef); + ret = rdev->ops->set_radar_background(wiphy, chandef); + trace_rdev_return_int(wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f8f01a3e020b..ec25924a1c26 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2371,6 +2371,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: if (!wdev->beacon_interval) goto wdev_inactive_unlock; chandef = wdev->chandef; @@ -2409,6 +2410,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: wiphy_lock(wiphy); ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); wiphy_unlock(wiphy); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 22e92be61938..b888522f133b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -406,22 +406,20 @@ static int cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, struct cfg80211_bss *nontrans_bss) { - const u8 *ssid; - size_t ssid_len; + const struct element *ssid_elem; struct cfg80211_bss *bss = NULL; rcu_read_lock(); - ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID); - if (!ssid) { + ssid_elem = ieee80211_bss_get_elem(nontrans_bss, WLAN_EID_SSID); + if (!ssid_elem) { rcu_read_unlock(); return -EINVAL; } - ssid_len = ssid[1]; - ssid = ssid + 2; /* check if nontrans_bss is in the list */ list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) { - if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) { + if (is_bss(bss, nontrans_bss->bssid, ssid_elem->data, + ssid_elem->datalen)) { rcu_read_unlock(); return 0; } @@ -1795,33 +1793,52 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band) + enum nl80211_band band, + enum cfg80211_bss_frame_type ftype) { - const u8 *tmp; - int channel_number = -1; + const struct element *tmp; + + if (band == NL80211_BAND_6GHZ) { + struct ieee80211_he_operation *he_oper; - if (band == NL80211_BAND_S1GHZ) { - tmp = cfg80211_find_ie(WLAN_EID_S1G_OPERATION, ie, ielen); - if (tmp && tmp[1] >= sizeof(struct ieee80211_s1g_oper_ie)) { - struct ieee80211_s1g_oper_ie *s1gop = (void *)(tmp + 2); + tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, + ielen); + if (tmp && tmp->datalen >= sizeof(*he_oper) && + tmp->datalen >= ieee80211_he_oper_size(&tmp->data[1])) { + const struct ieee80211_he_6ghz_oper *he_6ghz_oper; + + he_oper = (void *)&tmp->data[1]; + + he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); + if (!he_6ghz_oper) + return -1; - channel_number = s1gop->primary_ch; + if (ftype != CFG80211_BSS_FTYPE_BEACON || + he_6ghz_oper->control & IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON) + return he_6ghz_oper->primary; + } + } else if (band == NL80211_BAND_S1GHZ) { + tmp = cfg80211_find_elem(WLAN_EID_S1G_OPERATION, ie, ielen); + if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) { + struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data; + + return s1gop->primary_ch; } } else { - tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen); - if (tmp && tmp[1] == 1) { - channel_number = tmp[2]; - } else { - tmp = cfg80211_find_ie(WLAN_EID_HT_OPERATION, ie, ielen); - if (tmp && tmp[1] >= sizeof(struct ieee80211_ht_operation)) { - struct ieee80211_ht_operation *htop = (void *)(tmp + 2); + tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen); + if (tmp && tmp->datalen == 1) + return tmp->data[0]; - channel_number = htop->primary_chan; - } + tmp = cfg80211_find_elem(WLAN_EID_HT_OPERATION, ie, ielen); + if (tmp && + tmp->datalen >= sizeof(struct ieee80211_ht_operation)) { + struct ieee80211_ht_operation *htop = (void *)tmp->data; + + return htop->primary_chan; } } - return channel_number; + return -1; } EXPORT_SYMBOL(cfg80211_get_ies_channel_number); @@ -1831,18 +1848,20 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number); * from neighboring channels and the Beacon frames use the DSSS Parameter Set * element to indicate the current (transmitting) channel, but this might also * be needed on other bands if RX frequency does not match with the actual - * operating channel of a BSS. + * operating channel of a BSS, or if the AP reports a different primary channel. */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width) + enum nl80211_bss_scan_width scan_width, + enum cfg80211_bss_frame_type ftype) { u32 freq; int channel_number; struct ieee80211_channel *alt_channel; - channel_number = cfg80211_get_ies_channel_number(ie, ielen, channel->band); + channel_number = cfg80211_get_ies_channel_number(ie, ielen, + channel->band, ftype); if (channel_number < 0) { /* No channel information in frame payload */ @@ -1850,6 +1869,16 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } freq = ieee80211_channel_to_freq_khz(channel_number, channel->band); + + /* + * In 6GHz, duplicated beacon indication is relevant for + * beacons only. + */ + if (channel->band == NL80211_BAND_6GHZ && + (freq == channel->center_freq || + abs(freq - channel->center_freq) > 80)) + return channel; + alt_channel = ieee80211_get_channel_khz(wiphy, freq); if (!alt_channel) { if (channel->band == NL80211_BAND_2GHZ) { @@ -1911,7 +1940,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, - data->scan_width); + data->scan_width, ftype); if (!channel) return NULL; @@ -2234,7 +2263,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len) { u8 *ie, *new_ie, *pos; - const u8 *nontrans_ssid, *trans_ssid, *mbssid; + const struct element *nontrans_ssid; + const u8 *trans_ssid, *mbssid; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); size_t new_ie_len; @@ -2261,11 +2291,11 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, return; new_ie_len -= mbssid[1]; - nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID); + nontrans_ssid = ieee80211_bss_get_elem(nontrans_bss, WLAN_EID_SSID); if (!nontrans_ssid) return; - new_ie_len += nontrans_ssid[1]; + new_ie_len += nontrans_ssid->datalen; /* generate new ie for nontrans BSS * 1. replace SSID with nontrans BSS' SSID @@ -2282,7 +2312,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, pos = new_ie; /* copy the nontransmitted SSID */ - cpy_len = nontrans_ssid[1] + 2; + cpy_len = nontrans_ssid->datalen + 2; memcpy(pos, nontrans_ssid, cpy_len); pos += cpy_len; /* copy the IEs between SSID and MBSSID */ @@ -2333,6 +2363,7 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); int bss_type; + enum cfg80211_bss_frame_type ftype; BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != offsetof(struct ieee80211_mgmt, u.beacon.variable)); @@ -2369,8 +2400,16 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, variable = ext->u.s1g_beacon.variable; } + if (ieee80211_is_beacon(mgmt->frame_control)) + ftype = CFG80211_BSS_FTYPE_BEACON; + else if (ieee80211_is_probe_resp(mgmt->frame_control)) + ftype = CFG80211_BSS_FTYPE_PRESP; + else + ftype = CFG80211_BSS_FTYPE_UNKNOWN; + channel = cfg80211_get_bss_channel(wiphy, variable, - ielen, data->chan, data->scan_width); + ielen, data->chan, data->scan_width, + ftype); if (!channel) return NULL; @@ -2687,7 +2726,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct cfg80211_registered_device *rdev; struct wiphy *wiphy; struct iw_scan_req *wreq = NULL; - struct cfg80211_scan_request *creq = NULL; + struct cfg80211_scan_request *creq; int i, err, n_channels = 0; enum nl80211_band band; @@ -2702,10 +2741,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req || rdev->scan_msg) { - err = -EBUSY; - goto out; - } + if (rdev->scan_req || rdev->scan_msg) + return -EBUSY; wiphy = &rdev->wiphy; @@ -2718,10 +2755,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) + n_channels * sizeof(void *), GFP_ATOMIC); - if (!creq) { - err = -ENOMEM; - goto out; - } + if (!creq) + return -ENOMEM; creq->wiphy = wiphy; creq->wdev = dev->ieee80211_ptr; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 08a70b4f090c..ff4d48fcbfb2 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -680,7 +680,9 @@ void __cfg80211_connect_result(struct net_device *dev, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - const u8 *country_ie; + const struct element *country_elem; + const u8 *country_data; + u8 country_datalen; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -762,26 +764,22 @@ void __cfg80211_connect_result(struct net_device *dev, cfg80211_upload_connect_keys(wdev); rcu_read_lock(); - country_ie = ieee80211_bss_get_ie(cr->bss, WLAN_EID_COUNTRY); - if (!country_ie) { + country_elem = ieee80211_bss_get_elem(cr->bss, WLAN_EID_COUNTRY); + if (!country_elem) { rcu_read_unlock(); return; } - country_ie = kmemdup(country_ie, 2 + country_ie[1], GFP_ATOMIC); + country_datalen = country_elem->datalen; + country_data = kmemdup(country_elem->data, country_datalen, GFP_ATOMIC); rcu_read_unlock(); - if (!country_ie) + if (!country_data) return; - /* - * ieee80211_bss_get_ie() ensures we can access: - * - country_ie + 2, the start of the country ie data, and - * - and country_ie[1] which is the IE length - */ regulatory_hint_country_ie(wdev->wiphy, cr->bss->channel->band, - country_ie + 2, country_ie[1]); - kfree(country_ie); + country_data, country_datalen); + kfree(country_data); } /* Consumes bss object one way or another */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ad6c16a06bcb..228079d7690a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3053,18 +3053,21 @@ TRACE_EVENT(cfg80211_ch_switch_started_notify, ); TRACE_EVENT(cfg80211_radar_event, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, chandef), + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, + bool offchan), + TP_ARGS(wiphy, chandef, offchan), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY + __field(bool, offchan) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->offchan = offchan; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", offchan %d", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->offchan) ); TRACE_EVENT(cfg80211_cac_event, @@ -3674,6 +3677,42 @@ TRACE_EVENT(cfg80211_bss_color_notify, __entry->color_bitmap) ); +TRACE_EVENT(rdev_set_radar_background, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + + TP_ARGS(wiphy, chandef), + + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef) + ), + + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + +TRACE_EVENT(cfg80211_assoc_comeback, + TP_PROTO(struct wireless_dev *wdev, const u8 *bssid, u32 timeout), + TP_ARGS(wdev, bssid, timeout), + TP_STRUCT__entry( + WDEV_ENTRY + MAC_ENTRY(bssid) + __field(u32, timeout) + ), + TP_fast_assign( + WDEV_ASSIGN; + MAC_ASSIGN(bssid, bssid); + __entry->timeout = timeout; + ), + TP_printk(WDEV_PR_FMT ", " MAC_PR_FMT ", timeout: %u TUs", + WDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->timeout) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 193a18a53142..cd09a9042261 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -212,18 +212,18 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, wdev_lock(wdev); if (wdev->current_bss) { - const u8 *ie; + const struct element *ssid_elem; rcu_read_lock(); - ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, - WLAN_EID_SSID); - if (ie) { + ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, + WLAN_EID_SSID); + if (ssid_elem) { data->flags = 1; - data->length = ie[1]; + data->length = ssid_elem->datalen; if (data->length > IW_ESSID_MAX_SIZE) ret = -EINVAL; else - memcpy(ssid, ie + 2, data->length); + memcpy(ssid, ssid_elem->data, data->length); } rcu_read_unlock(); } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) { diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index 10e0f3dbc930..5f6eb965cfd1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -217,9 +217,11 @@ short_payload_get() dest_mac=$(mac_get $h1) p=$(: )"08:"$( : VXLAN flags - )"01:00:00:"$( : VXLAN reserved + )"00:00:00:"$( : VXLAN reserved )"00:03:e8:"$( : VXLAN VNI : 1000 )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr ) echo $p } @@ -263,7 +265,8 @@ decap_error_test() corrupted_packet_test "Decap error: Reserved bits in use" \ "reserved_bits_payload_get" - corrupted_packet_test "Decap error: No L2 header" "short_payload_get" + corrupted_packet_test "Decap error: Too short inner packet" \ + "short_payload_get" } mc_smac_payload_get() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh new file mode 100755 index 000000000000..f6c16cbb6cf7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh @@ -0,0 +1,342 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel drops and exceptions functionality over mlxsw. +# Check all traps to make sure they are triggered under the right +# conditions. + +# +------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|-------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRF2 | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test + overlay_smac_is_mc_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx1 master br1 + ip link set dev vx1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:3::1/64 +} + +switch_destroy() +{ + ip address del dev $rp1 2001:db8:3::1/64 + ip link set dev $rp1 down + + ip link set dev vx1 down + ip link set dev vx1 nomaster + ip link del dev vx1 + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link del dev br1 +} + +vrf2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + forwarding_restore + vrf_cleanup +} + +ecn_payload_get() +{ + local dest_mac=$(mac_get $h1) + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp \ + sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +reserved_bits_payload_get() +{ + local dest_mac=$(mac_get $h1) + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +short_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + ) + echo $p +} + +corrupted_packet_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local payload_get=$1; shift + local mz_pid + + RET=0 + + # In case of too short packet, there is no any inner packet, + # so the matching will always succeed + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower skip_hw src_ip 2001:db8:3::1 dst_ip 2001:db8:1::1 \ + action pass + + rp1_mac=$(mac_get $rp1) + payload=$($payload_get) + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +decap_error_test() +{ + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + corrupted_packet_test "Decap error: Reserved bits in use" \ + "reserved_bits_payload_get" + corrupted_packet_test "Decap error: Too short inner packet" \ + "short_payload_get" +} + +mc_smac_payload_get() +{ + local dest_mac=$(mac_get $h1) + local source_mac="01:02:03:04:05:06" + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +overlay_smac_is_mc_test() +{ + local trap_name="overlay_smac_is_mc" + local mz_pid + + RET=0 + + # The matching will be checked on devlink_trap_drop_test() + # and the filter will be removed on devlink_trap_drop_cleanup() + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_mac 01:02:03:04:05:06 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(mc_smac_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp1 101 + + log_test "Overlay source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp1 "ipv6" 1 101 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh new file mode 100755 index 000000000000..429f7ee735cf --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh @@ -0,0 +1,322 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to four IPv6 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +-----------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 2001:db8:2::1 | | +# | | remote 2001:db8:2::{2..17} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:2::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning \ + udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local 2001:db8:2::1 dstport 4789 + + ip address add 2001:db8:2::1/128 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 2001:db8:2::1/128 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 2001:db8:3::1/64 dev $rp1 + ip route add 2001:db8:2::0/64 via 2001:db8:3::2 +} + +router1_destroy() +{ + ip route del 2001:db8:2::0/64 via 2001:db8:3::2 + ip address del 2001:db8:3::1/64 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 2001:db8:3::2/64 +} + +router2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 2001:db8:2::$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \ + flower ip_proto udp dst_ip 2001:db8:2::$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ipv6 pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 16 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=16 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 10..13 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 3 3 3 3 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 4 4 4 4 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 2 4 5 5 5 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 6 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 8 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh new file mode 100755 index 000000000000..d8fd875ad527 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh @@ -0,0 +1,334 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to five IPv6 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +-----------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 2001:db8:2::1 | | +# | | remote 2001:db8:2::{2..21} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:2::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning \ + udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local 2001:db8:2::1 dstport 4789 + + ip address add 2001:db8:2::1/128 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 2001:db8:2::1/128 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 2001:db8:3::1/64 dev $rp1 + ip route add 2001:db8:2::0/64 via 2001:db8:3::2 +} + +router1_destroy() +{ + ip route del 2001:db8:2::0/64 via 2001:db8:3::2 + ip address del 2001:db8:3::1/64 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 2001:db8:3::2/64 +} + +router2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 2001:db8:2::$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \ + flower ip_proto udp dst_ip 2001:db8:2::$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ipv6 pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 20 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=20 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 12..16 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 2 3 3 3 3 3 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 2 4 4 4 4 4 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::18 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::19 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::20 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::21 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 2 2 4 5 5 5 5 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 6 6 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 7 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 8 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 9 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 3639b89c81ba..99a332b712f0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -4,10 +4,35 @@ # Test various aspects of VxLAN offloading which are specific to mlxsw, such # as sanitization of invalid configurations and offload indication. -lib_dir=$(dirname $0)/../../../net/forwarding +: ${ADDR_FAMILY:=ipv4} +export ADDR_FAMILY + +: ${LOCAL_IP_1:=198.51.100.1} +export LOCAL_IP_1 + +: ${LOCAL_IP_2:=198.51.100.2} +export LOCAL_IP_2 + +: ${PREFIX_LEN:=32} +export PREFIX_LEN + +: ${UDPCSUM_FLAFS:=noudpcsum} +export UDPCSUM_FLAFS -ALL_TESTS="sanitization_test offload_indication_test \ - sanitization_vlan_aware_test offload_indication_vlan_aware_test" +: ${MC_IP:=239.0.0.1} +export MC_IP + +: ${IP_FLAG:=""} +export IP_FLAG + +: ${ALL_TESTS:=" + sanitization_test + offload_indication_test + sanitization_vlan_aware_test + offload_indication_vlan_aware_test +"} + +lib_dir=$(dirname $0)/../../../net/forwarding NUM_NETIFS=2 : ${TIMEOUT:=20000} # ms source $lib_dir/lib.sh @@ -63,8 +88,8 @@ sanitization_single_dev_valid_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_pass @@ -80,8 +105,8 @@ sanitization_single_dev_vlan_aware_test() ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_pass @@ -97,8 +122,8 @@ sanitization_single_dev_mcast_enabled_test() ip link add dev br0 type bridge - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -115,9 +140,9 @@ sanitization_single_dev_mcast_group_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add name dummy1 up type dummy - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ - dev dummy1 group 239.0.0.1 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ + dev dummy1 group $MC_IP sanitization_single_dev_test_fail @@ -134,7 +159,7 @@ sanitization_single_dev_no_local_ip_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ ttl 20 tos inherit dstport 4789 sanitization_single_dev_test_fail @@ -145,14 +170,14 @@ sanitization_single_dev_no_local_ip_test() log_test "vxlan device with no local ip" } -sanitization_single_dev_learning_enabled_test() +sanitization_single_dev_learning_enabled_ipv4_test() { RET=0 ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_pass @@ -169,8 +194,8 @@ sanitization_single_dev_local_interface_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add name dummy1 up type dummy - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1 sanitization_single_dev_test_fail @@ -187,8 +212,8 @@ sanitization_single_dev_port_range_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ srcport 4000 5000 sanitization_single_dev_test_fail @@ -205,8 +230,8 @@ sanitization_single_dev_tos_static_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos 20 local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -222,8 +247,8 @@ sanitization_single_dev_ttl_inherit_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl inherit tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -233,14 +258,14 @@ sanitization_single_dev_ttl_inherit_test() log_test "vxlan device with inherit ttl" } -sanitization_single_dev_udp_checksum_test() +sanitization_single_dev_udp_checksum_ipv4_test() { RET=0 ip link add dev br0 type bridge mcast_snooping 0 ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -259,12 +284,12 @@ sanitization_single_dev_test() sanitization_single_dev_mcast_enabled_test sanitization_single_dev_mcast_group_test sanitization_single_dev_no_local_ip_test - sanitization_single_dev_learning_enabled_test + sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test sanitization_single_dev_local_interface_test sanitization_single_dev_port_range_test sanitization_single_dev_tos_static_test sanitization_single_dev_ttl_inherit_test - sanitization_single_dev_udp_checksum_test + sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test } sanitization_multi_devs_test_pass() @@ -316,10 +341,10 @@ sanitization_multi_devs_valid_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_multi_devs_test_pass @@ -338,10 +363,10 @@ sanitization_multi_devs_ttl_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 40 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_multi_devs_test_fail @@ -360,10 +385,10 @@ sanitization_multi_devs_udp_dstport_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 5789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789 sanitization_multi_devs_test_fail @@ -382,10 +407,10 @@ sanitization_multi_devs_local_ip_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.2 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789 sanitization_multi_devs_test_fail @@ -425,12 +450,12 @@ offload_indication_setup_create() ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 } offload_indication_setup_destroy() @@ -438,7 +463,7 @@ offload_indication_setup_destroy() ip link del dev vxlan1 ip link del dev vxlan0 - ip address del 198.51.100.1/32 dev lo + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo ip link set dev $swp2 nomaster ip link set dev $swp1 nomaster @@ -451,7 +476,7 @@ offload_indication_fdb_flood_test() { RET=0 - bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \ bridge fdb show brport vxlan0 @@ -467,7 +492,7 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ - dst 198.51.100.2 + dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 @@ -518,7 +543,7 @@ offload_indication_fdb_bridge_test() # marked as offloaded in both drivers RET=0 - bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? @@ -542,17 +567,17 @@ offload_indication_decap_route_test() RET=0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev vxlan0 down busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev vxlan1 down busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vxlan device down" @@ -561,26 +586,26 @@ offload_indication_decap_route_test() ip link set dev vxlan1 up busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev vxlan0 up busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vxlan device up" RET=0 - ip address delete 198.51.100.1/32 dev lo + ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - add local route" @@ -589,18 +614,18 @@ offload_indication_decap_route_test() ip link set dev $swp1 nomaster busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev $swp2 nomaster busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - local ports enslavement" @@ -609,12 +634,12 @@ offload_indication_decap_route_test() ip link del dev br0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link del dev br1 busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - bridge device deletion" @@ -628,25 +653,25 @@ offload_indication_decap_route_test() ip link set dev vxlan0 master br0 ip link set dev vxlan1 master br1 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link del dev vxlan0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link del dev vxlan1 busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vxlan device deletion" ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 } check_fdb_offloaded() @@ -703,10 +728,10 @@ __offload_indication_join_vxlan_first() local mac=00:11:22:33:44:55 local zmac=00:00:00:00:00:00 - bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 ip link set dev vxlan0 master br0 - bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2 + bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2 RET=0 check_vxlan_fdb_not_offloaded @@ -756,8 +781,8 @@ __offload_indication_join_vxlan_first() offload_indication_join_vxlan_first() { ip link add dev br0 up type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_first @@ -771,7 +796,7 @@ __offload_indication_join_vxlan_last() RET=0 - bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 ip link set dev $swp1 master br0 @@ -791,8 +816,8 @@ __offload_indication_join_vxlan_last() offload_indication_join_vxlan_last() { ip link add dev br0 up type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_last @@ -819,10 +844,10 @@ sanitization_vlan_aware_test() ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 # Test that when each VNI is mapped to a different VLAN we can enslave # a port to the bridge @@ -866,20 +891,20 @@ sanitization_vlan_aware_test() # Use the offload indication of the local route to ensure the VXLAN # configuration was correctly rollbacked. - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo ip link set dev vxlan10 type vxlan ttl 10 ip link set dev $swp1 master br0 &> /dev/null check_fail $? busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vlan-aware - failed enslavement to bridge due to conflict" ip link set dev vxlan10 type vxlan ttl 20 - ip address del 198.51.100.1/32 dev lo + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo ip link del dev vxlan20 ip link del dev vxlan10 @@ -898,12 +923,12 @@ offload_indication_vlan_aware_setup_create() bridge vlan add vid 10 dev $swp1 bridge vlan add vid 20 dev $swp1 - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged @@ -917,7 +942,7 @@ offload_indication_vlan_aware_setup_destroy() ip link del dev vxlan20 ip link del dev vxlan10 - ip address del 198.51.100.1/32 dev lo + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo bridge vlan del vid 20 dev $swp1 bridge vlan del vid 10 dev $swp1 @@ -934,7 +959,7 @@ offload_indication_vlan_aware_fdb_test() log_info "vxlan entry offload indication - vlan-aware" bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ - dst 198.51.100.2 vlan 10 + dst $LOCAL_IP_2 vlan 10 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 @@ -985,7 +1010,7 @@ offload_indication_vlan_aware_fdb_test() # marked as offloaded in both drivers RET=0 - bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? @@ -1003,7 +1028,7 @@ offload_indication_vlan_aware_decap_route_test() RET=0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? # Toggle PVID flag on one VxLAN device and make sure route is still @@ -1011,7 +1036,7 @@ offload_indication_vlan_aware_decap_route_test() bridge vlan add vid 10 dev vxlan10 untagged busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? # Toggle PVID flag on second VxLAN device and make sure route is no @@ -1019,14 +1044,15 @@ offload_indication_vlan_aware_decap_route_test() bridge vlan add vid 20 dev vxlan20 untagged busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? # Toggle PVID flag back and make sure route is marked as offloaded bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged - busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1 + busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \ + $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vni map/unmap" @@ -1036,8 +1062,8 @@ offload_indication_vlan_aware_join_vxlan_first() { ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_first 1 @@ -1049,8 +1075,8 @@ offload_indication_vlan_aware_join_vxlan_last() { ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_last @@ -1067,14 +1093,14 @@ offload_indication_vlan_aware_l3vni_test() sysctl_set net.ipv6.conf.default.disable_ipv6 1 ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link set dev $swp1 master br0 # The test will use the offload indication on the FDB entry to # understand if the tunnel is offloaded or not - bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1 + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 ip link set dev vxlan0 master br0 bridge vlan add dev vxlan0 vid 10 pvid untagged diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh index 749ba3cfda1d..38148f51877a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh @@ -4,6 +4,21 @@ # Test vetoing of FDB entries that mlxsw can not offload. This exercises several # different veto vectors to test various rollback scenarios in the vxlan driver. +: ${LOCAL_IP:=198.51.100.1} +export LOCAL_IP + +: ${REMOTE_IP_1:=198.51.100.2} +export REMOTE_IP_1 + +: ${REMOTE_IP_2:=198.51.100.3} +export REMOTE_IP_2 + +: ${UDPCSUM_FLAFS:=noudpcsum} +export UDPCSUM_FLAFS + +: ${MC_IP:=224.0.0.1} +export MC_IP + lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" @@ -26,8 +41,8 @@ setup_prepare() ip link set dev $swp1 master br0 ip link set dev $swp2 up - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP dstport 4789 ip link set dev vxlan0 master br0 } @@ -50,11 +65,11 @@ fdb_create_veto_test() RET=0 bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ - dst 198.51.100.2 2>/dev/null + dst $REMOTE_IP_1 2>/dev/null check_fail $? "multicast MAC not rejected" bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ - dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum + dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum check_err $? "multicast MAC rejected without extack" log_test "vxlan FDB veto - create" @@ -65,15 +80,15 @@ fdb_replace_veto_test() RET=0 bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \ - dst 198.51.100.2 + dst $REMOTE_IP_1 check_err $? "valid FDB rejected" bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ - dst 198.51.100.2 port 1234 2>/dev/null + dst $REMOTE_IP_1 port 1234 2>/dev/null check_fail $? "FDB with an explicit port not rejected" bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ - dst 198.51.100.2 port 1234 2>&1 >/dev/null \ + dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with an explicit port rejected without extack" @@ -85,15 +100,15 @@ fdb_append_veto_test() RET=0 bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \ - dst 198.51.100.2 + dst $REMOTE_IP_1 check_err $? "valid FDB rejected" bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ - dst 198.51.100.3 port 1234 2>/dev/null + dst $REMOTE_IP_2 port 1234 2>/dev/null check_fail $? "FDB with an explicit port not rejected" bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ - dst 198.51.100.3 port 1234 2>&1 >/dev/null \ + dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with an explicit port rejected without extack" @@ -105,11 +120,11 @@ fdb_changelink_veto_test() RET=0 ip link set dev vxlan0 type vxlan \ - group 224.0.0.1 dev lo 2>/dev/null + group $MC_IP dev lo 2>/dev/null check_fail $? "FDB with a multicast IP not rejected" ip link set dev vxlan0 type vxlan \ - group 224.0.0.1 dev lo 2>&1 >/dev/null \ + group $MC_IP dev lo 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with a multicast IP rejected without extack" diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh new file mode 100755 index 000000000000..66c87aab86f6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN test for IPv6. + +LOCAL_IP=2001:db8:1::1 +REMOTE_IP_1=2001:db8:2::1 +REMOTE_IP_2=2001:db8:3::1 +UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx" +MC_IP=FF02::2 + +source vxlan_fdb_veto.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh new file mode 100755 index 000000000000..f2ea0163ddea --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN test for IPv6. + +ADDR_FAMILY=ipv6 +LOCAL_IP_1=2001:db8:1::1 +LOCAL_IP_2=2001:db8:1::2 +PREFIX_LEN=128 +UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx" +MC_IP=FF02::2 +IP_FLAG="-6" + +ALL_TESTS=" + sanitization_test + offload_indication_test + sanitization_vlan_aware_test + offload_indication_vlan_aware_test +" + +sanitization_single_dev_learning_enabled_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_udp_checksum_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning \ + noudp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + log_test "vxlan device without zero udp checksum at RX" + + ip link del dev vxlan0 + + ip link add name vxlan0 up type vxlan id 10 nolearning \ + udp6zerocsumrx noudp6zerocsumtx ttl 20 tos inherit \ + local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + log_test "vxlan device without zero udp checksum at TX" + + ip link del dev vxlan0 + ip link del dev br0 + +} + +source vxlan.sh diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index dfd827b7a9f9..7da783d6f453 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -20,6 +20,7 @@ NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} MCD=${MCD:=smcrouted} MC_CLI=${MC_CLI:=smcroutectl} +PING_COUNT=${PING_COUNT:=10} PING_TIMEOUT=${PING_TIMEOUT:=5} WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} @@ -1111,7 +1112,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null + $PING $args $dip -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT &> /dev/null } ping_test() @@ -1132,7 +1134,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null + $PING6 $args $dip -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT &> /dev/null } ping6_test() diff --git a/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh new file mode 100755 index 000000000000..0548b2b0d416 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1ad) + $swp2 | | +# | | vid 100 pvid untagged vid 100 pvid | | +# | | untagged | | +# | | + vx100 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | | +# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv6 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 2001:db8:1::2/64 + vlan_create $h2 20 v$h2 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx100 type vxlan id 1000 \ + local 2001:db8:3::1 dstport "$VXPORT" \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 100 dev $swp2 pvid untagged + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge vlan del vid 100 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + bridge vlan add vid 100 dev w1 pvid untagged + + ip link add name vx100 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx100 up + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx100 master br2 + tc qdisc add dev vx100 clsact + + bridge vlan add vid 100 dev vx100 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/64 + vlan_create w2 20 vw2 $host_addr2/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 2001:db8:1::3 2001:db8:2::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 2001:db8:1::4 2001:db8:2::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 ": local->local" + ping6_test $h1 2001:db8:1::3 ": local->remote 1" + ping6_test $h1 2001:db8:1::4 ": local->remote 2" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh new file mode 100755 index 000000000000..f4930098974f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh @@ -0,0 +1,504 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------------------+ +-----------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 | +# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 | +# +----|---------------------------+ +-|---------------------------+ +# | | +# +----|------------------------------------------|---------------------------+ +# | SW | | | +# | +--|------------------------------------------|-------------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::1 local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 2001:db8:1::2/64 2001:db8:2::2/64 | | | +# | | | | | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | 2001:db8:1::3/64 2001:db8:2::3/64 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 2001:db8:4::1/64 2001:db8:3::1/128 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 2001:db8:4::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 2001:db8:5::2/64 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 2001:db8:5::1/64 2001:db8:3::2/128 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::2 local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | | +# | | default via | | default via | | +# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv6 + arp_decap +" +NUM_NETIFS=6 +source lib.sh + +require_command $ARPING + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/64 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/64 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:4::1/64 + ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2 + + ip link add name vx10 type vxlan id 1000 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 20 dev $swp2 pvid untagged + + ip address add 2001:db8:3::1/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::2/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::2/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + +} + +switch_destroy() +{ + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 2001:db8:3::1/128 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2 + ip address del dev $rp1 2001:db8:4::1/64 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 2001:db8:4::2/64 dev $rp2 + ip address add 2001:db8:5::2/64 dev v1 + + ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 +} + +spine_destroy() +{ + ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 + ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + + ip address del 2001:db8:5::2/64 dev v1 + ip address del 2001:db8:4::2/64 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 2001:db8:5::1/64 + ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2 + + ip link add name vx10 type vxlan id 1000 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 2001:db8:3::2/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::3/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::3/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +macs_populate() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f macs_populate + +macs_initialize() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + macs_populate $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \ + 2001:db8:3::2 + in_ns ns1 macs_populate $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \ + 2001:db8:3::1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + in_ns ns1 forwarding_enable + + macs_initialize +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20" + ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10" + ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20" + ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20" + ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10" +} + +arp_decap() +{ + # Repeat the ping tests, but without populating the neighbours. This + # makes sure we correctly decapsulate ARP packets + log_info "deleting neighbours from vlan interfaces" + + ip neigh del 2001:db8:1::4 dev vlan10 + ip neigh del 2001:db8:2::4 dev vlan20 + + ping_ipv6 + + ip neigh replace 2001:db8:1::4 lladdr $(in_ns ns1 mac_get w2) \ + nud noarp dev vlan10 extern_learn + ip neigh replace 2001:db8:2::4 lladdr $(in_ns ns1 mac_get w4) \ + nud noarp dev vlan20 extern_learn +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh new file mode 100755 index 000000000000..ac97f07e5ce8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -0,0 +1,804 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1d) | | | | BR2 (802.1d) | | +# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + test_ttl + test_tos + test_ecn_encap + test_ecn_decap + reapply_config + ping_ipv4 + ping_ipv6 + test_flood + test_unicast +"} + +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + tc qdisc add dev $rp1 clsact + + ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx1 up + + ip link set dev vx1 master br1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx1 nomaster + ip link set dev vx1 down + ip link del dev vx1 + + tc qdisc del dev $rp1 clsact + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr_ipv4=$1; shift + local host_addr_ipv6=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + ip link add name vx2 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx2 up + bridge fdb append dev vx2 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx2 master br2 + tc qdisc add dev vx2 clsact + + simple_if_init w2 $host_addr_ipv4/28 $host_addr_ipv6/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 192.0.2.3 2001:db8:1::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 192.0.2.4 2001:db8:1::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx1 is the first device to get +# attached to the bridge, and at that point the local IP is already +# configured. Try the other scenario of attaching the devices to a an +# already-offloaded bridge, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + ip link set dev vx1 nomaster + rp1_unset_addr + sleep 5 + + ip link set dev vx1 master br1 + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + sleep 1 + rp1_set_addr + sleep 5 +} + +__ping_ipv4() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome ARP noise. + PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv4() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_ip=192.0.2.1 + local w2_ns1_ip=192.0.2.3 + local w2_ns2_ip=192.0.2.4 + + ping_test $h1 192.0.2.2 ": local->local" + + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \ + "local->remote 1" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \ + "local->remote 2" +} + +__ping_ipv6() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip $dst_ip dst_ip $src_ip $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome neighbor discovery noise. + PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping6: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv6() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_ip=2001:db8:1::1 + local w2_ns1_ip=2001:db8:1::3 + local w2_ns2_ip=2001:db8:1::4 + + ping6_test $h1 2001:db8:1::2 ": local->local" + + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \ + "local->remote 1" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \ + "local->remote 2" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_sw action pass 2>/dev/null || \ + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_hw action pass +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx2 ns1" "vx2 ns2") + local counter + local key + + for counter in "${counters[@]}"; do + flood_counter_install $dst $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ -6 $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $dst $counter + done +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local what=$1; shift + + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 2001:db8:1::100 "flood" +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx1 2001:db8:4::1" + "$r2_mac vx1 2001:db8:5::1") + local target + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add $target + done + + __test_unicast $h2_mac 2001:db8:1::2 0 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:1::3 1 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:1::4 2 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del $target + done +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping6_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +test_ttl() +{ + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_ttl 99 action pass + vxlan_ping_test $h1 2001:db8:1::3 "" v1 egress 77 10 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: envelope TTL" +} + +test_tos() +{ + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_tos 0x14 action pass + vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x14" v1 egress 77 10 + vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x18" v1 egress 77 0 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: envelope TOS inheritance" +} + +__test_ecn_encap() +{ + local q=$1; shift + local tos=$1; shift + + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_tos $tos action pass + sleep 1 + vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: ECN encap: $q->$tos" +} + +test_ecn_encap() +{ + # In accordance with INET_ECN_encapsulate() + __test_ecn_encap 0x00 0x00 + __test_ecn_encap 0x01 0x01 + __test_ecn_encap 0x02 0x02 + __test_ecn_encap 0x03 0x02 +} + +vxlan_encapped_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + + $MZ -6 $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$(mac_get w2):"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"$inner_tos"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) +} +export -f vxlan_encapped_ping_do + +vxlan_encapped_ping_test() +{ + local ping_dev=$1; shift + local nh_dev=$1; shift + local ping_dip=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local stat_get=$1; shift + local expect=$1; shift + + local t0=$($stat_get) + + in_ns ns1 \ + vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \ + $ping_dip $(mac_get $h1) \ + $inner_tos $outer_tos + sleep 1 + local t1=$($stat_get) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} +export -f vxlan_encapped_ping_test + +__test_ecn_decap() +{ + local orig_inner_tos=$1; shift + local orig_outer_tos=$1; shift + local decapped_tos=$1; shift + + RET=0 + + tc filter add dev $h1 ingress pref 77 protocol ipv6 \ + flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 \ + ip_tos $decapped_tos action drop + sleep 1 + vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \ + $orig_inner_tos $orig_outer_tos \ + "tc_rule_stats_get $h1 77 ingress" 10 + tc filter del dev $h1 ingress pref 77 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos" +} + +test_ecn_decap_error() +{ + local orig_inner_tos="0:0" + local orig_outer_tos=03 + + RET=0 + + vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \ + $orig_inner_tos $orig_outer_tos \ + "link_stats_rx_errors_get vx1" 10 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error" +} + +test_ecn_decap() +{ + # In accordance with INET_ECN_decapsulate() + __test_ecn_decap "0:0" 00 0x00 + __test_ecn_decap "0:0" 01 0x00 + __test_ecn_decap "0:0" 02 0x00 + # 00 03 is tested in test_ecn_decap_error() + __test_ecn_decap "0:1" 00 0x01 + __test_ecn_decap "0:1" 01 0x01 + __test_ecn_decap "0:1" 02 0x01 + __test_ecn_decap "0:1" 03 0x03 + __test_ecn_decap "0:2" 00 0x02 + __test_ecn_decap "0:2" 01 0x01 + __test_ecn_decap "0:2" 02 0x02 + __test_ecn_decap "0:2" 03 0x03 + __test_ecn_decap "0:3" 00 0x03 + __test_ecn_decap "0:3" 01 0x03 + __test_ecn_decap "0:3" 02 0x03 + __test_ecn_decap "0:3" 03 0x03 + test_ecn_decap_error +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh new file mode 100755 index 000000000000..00540317737a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" +source vxlan_bridge_1d_ipv6.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index a5789721ba92..a596bbf3ed6a 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -680,26 +680,6 @@ test_pvid() log_test "VXLAN: flood after vlan re-add" } -vxlan_ping_test() -{ - local ping_dev=$1; shift - local ping_dip=$1; shift - local ping_args=$1; shift - local capture_dev=$1; shift - local capture_dir=$1; shift - local capture_pref=$1; shift - local expect=$1; shift - - local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) - ping_do $ping_dev $ping_dip "$ping_args" - local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) - local delta=$((t1 - t0)) - - # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) - check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." -} - __test_learning() { local -a expects=(0 0 0 0 0) diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh new file mode 100755 index 000000000000..d880df89bc8b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh @@ -0,0 +1,837 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | | +# | | local: local: | | +# | | 2001:db8:3::1 2001:db8:3::1 | | +# | | remote: remote: | | +# | | 2001:db8:4::1 2001:db8:5::1 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1q) | | | | BR2 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 | | | | | vid 10 | | +# | | | vid 20 | | | | | vid 20 | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + reapply_config + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + test_pvid + ping_ipv4 + ping_ipv6 + test_flood + test_pvid +"} + +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 2001:db8:1::2/64 + vlan_create $h2 20 v$h2 198.51.100.2/24 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + tc qdisc add dev $rp1 clsact + + ip link add name vx10 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + tc qdisc del dev $rp1 clsact + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1_ipv4=$1; shift + local host_addr1_ipv6=$1; shift + local host_addr2_ipv4=$1; shift + local host_addr2_ipv6=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + bridge vlan add vid 10 dev w1 + bridge vlan add vid 20 dev w1 + + ip link add name vx10 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx10 up + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx10 master br2 + tc qdisc add dev vx10 clsact + + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx20 up + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx20 master br2 + tc qdisc add dev vx20 clsact + + bridge vlan add vid 20 dev vx20 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1_ipv4/28 $host_addr1_ipv6/64 + vlan_create w2 20 vw2 $host_addr2_ipv4/24 $host_addr2_ipv6/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 192.0.2.3 2001:db8:1::3 198.51.100.3 2001:db8:2::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 192.0.2.4 2001:db8:1::4 198.51.100.4 2001:db8:2::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx10 and vx20 were the first devices to get +# attached to the bridge, and at that point the local IP is already +# configured. Try the other scenario of attaching these devices to a bridge +# that already has local ports members, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + + ip link set dev vx20 nomaster + ip link set dev vx10 nomaster + + rp1_unset_addr + sleep 5 + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + + rp1_set_addr + sleep 5 +} + +__ping_ipv4() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \ + flower vlan_ethtype ipv4 src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome ARP noise. + PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv4() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_10_ip=192.0.2.1 + local h1_20_ip=198.51.100.1 + local w2_10_ns1_ip=192.0.2.3 + local w2_10_ns2_ip=192.0.2.4 + local w2_20_ns1_ip=198.51.100.3 + local w2_20_ns2_ip=198.51.100.4 + + ping_test $h1.10 192.0.2.2 ": local->local vid 10" + ping_test $h1.20 198.51.100.2 ": local->local vid 20" + + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \ + "local->remote 1 vid 10" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \ + "local->remote 2 vid 10" + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \ + "local->remote 1 vid 20" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \ + "local->remote 2 vid 20" +} + +__ping_ipv6() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \ + flower vlan_ethtype ipv6 src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome neighbor discovery noise. + PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping6: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv6() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_10_ip=2001:db8:1::1 + local h1_20_ip=2001:db8:2::1 + local w2_10_ns1_ip=2001:db8:1::3 + local w2_10_ns2_ip=2001:db8:1::4 + local w2_20_ns1_ip=2001:db8:2::3 + local w2_20_ns2_ip=2001:db8:2::4 + + ping6_test $h1.10 2001:db8:1::2 ": local->local vid 10" + ping6_test $h1.20 2001:db8:2::2 ": local->local vid 20" + + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \ + "local->remote 1 vid 10" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \ + "local->remote 2 vid 10" + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \ + "local->remote 1 vid 20" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \ + "local->remote 2 vid 20" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_sw action pass 2>/dev/null || \ + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_hw action pass +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2") + local counter + local key + + # Packets reach the local host tagged whereas they reach the VxLAN + # devices untagged. In order to be able to use the same filter for + # all counters, make sure the packets also reach the local host + # untagged + bridge vlan add vid $vid dev $swp2 untagged + for counter in "${counters[@]}"; do + flood_counter_install $dst $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ -6 $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $dst $counter + done + bridge vlan add vid $vid dev $swp2 +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local what=$1; shift + local -a expects=("${@}") + + RET=0 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 2001:db8:1::100 10 "flood vlan 10" \ + 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 2001:db8:2::100 20 "flood vlan 20" \ + 10 0 10 0 10 +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local vid=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local vid=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx10 2001:db8:4::1" + "$r2_mac vx10 2001:db8:5::1") + local target + + log_info "unicast vlan 10" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 10 $target + done + + __test_unicast $h2_mac 2001:db8:1::2 0 10 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:1::3 1 10 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:1::4 3 10 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 10 $target + done + + log_info "unicast vlan 20" + + targets=("$h2_mac $h2" "$r1_mac vx20 2001:db8:4::1" \ + "$r2_mac vx20 2001:db8:5::1") + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 20 $target + done + + __test_unicast $h2_mac 2001:db8:2::2 0 20 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:2::3 2 20 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:2::4 4 20 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 20 $target + done +} + +test_pvid() +{ + local -a expects=(0 0 0 0 0) + local mac=de:ad:be:ef:13:37 + local dst=2001:db8:1::100 + local vid=10 + + # Check that flooding works + RET=0 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before pvid off" + + # Toggle PVID off and test that flood to remote hosts does not work + RET=0 + + bridge vlan add vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid off" + + # Toggle PVID on and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid on" + + # Add a new VLAN and test that it does not affect flooding + RET=0 + + bridge vlan add vid 30 dev vx10 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + bridge vlan del vid 30 dev vx10 + + log_test "VXLAN: flood after vlan add" + + # Remove currently mapped VLAN and test that flood to remote hosts does + # not work + RET=0 + + bridge vlan del vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan delete" + + # Re-add the VLAN and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan re-add" +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh new file mode 100755 index 000000000000..344f43ccb755 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" +source vxlan_bridge_1q_ipv6.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh new file mode 100755 index 000000000000..904633427fd0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh @@ -0,0 +1,563 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + + +# +--------------------------------+ +-----------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 | +# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 | +# +----|---------------------------+ +-|---------------------------+ +# | | +# +----|------------------------------------------|---------------------------+ +# | SW | | | +# | +--|------------------------------------------|-------------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::1 local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | | +# | | + vlan10 | vlan20 + | | +# | | | 2001:db8:1::2/64 | 2001:db8:2::2/64 | | | +# | | | | | | | +# | | + vlan10-v (macvlan) + vlan20-v (macvlan) + | | +# | | 2001:db8:1::3/64 vlan4001 2001:db8:2::3/64 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 2001:db8:4::1/64 2001:db8:3::1 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 2001:db8:4::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 2001:db8:5::2/64 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 2001:db8:5::1/64 2001:db8:3::2/128 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | vlan4001 | | | +# | | + vlan10 + vlan20 + | | +# | | | 2001:db8:1::3/64 | 2001:db8:2::3/64 | | | +# | | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::2 local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | | +# | | default via | | default via | | +# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv6 +" +NUM_NETIFS=6 +source lib.sh + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/64 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/64 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:4::1/64 + ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2 + + ip link add name vx10 type vxlan id 1010 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + ip link add name vx4001 type vxlan id 104001 \ + local 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip address add 2001:db8:3::1/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::2/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::2/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + bridge vlan add vid 10 dev $swp1 pvid untagged + bridge vlan add vid 20 dev $swp2 pvid untagged +} + +switch_destroy() +{ + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 4001 dev br1 self + ip link del dev vlan4001 + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 2001:db8:3::1/128 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 4001 dev vx4001 + ip link set dev vx4001 nomaster + + ip link set dev vx4001 down + ip link del dev vx4001 + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2 + ip address del dev $rp1 2001:db8:4::1/64 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 2001:db8:4::2/64 dev $rp2 + ip address add 2001:db8:5::2/64 dev v1 + + ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 +} + +spine_destroy() +{ + ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 + ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + + ip address del 2001:db8:5::2/64 dev v1 + ip address del 2001:db8:4::2/64 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 2001:db8:5::1/64 + ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2 + + ip link add name vx10 type vxlan id 1010 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link add name vx4001 type vxlan id 104001 \ + local 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 2001:db8:3::2/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::3/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::3/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +__l2_vni_init() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f __l2_vni_init + +l2_vni_init() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + __l2_vni_init $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \ + 2001:db8:3::2 + in_ns ns1 __l2_vni_init $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \ + 2001:db8:3::1 +} + +__l3_vni_init() +{ + local mac=$1; shift + local vtep_ip=$1; shift + local host1_ip=$1; shift + local host2_ip=$1; shift + + bridge fdb add $mac dev vx4001 self master extern_learn static \ + dst $vtep_ip vlan 4001 + + ip neigh add $vtep_ip lladdr $mac nud noarp dev vlan4001 extern_learn + + ip route add $host1_ip/128 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink + ip route add $host2_ip/128 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink +} +export -f __l3_vni_init + +l3_vni_init() +{ + local vlan4001_ns_mac=$(in_ns ns1 mac_get vlan4001) + local vlan4001_mac=$(mac_get vlan4001) + + __l3_vni_init $vlan4001_ns_mac 2001:db8:3::2 2001:db8:1::4 \ + 2001:db8:2::4 + in_ns ns1 __l3_vni_init $vlan4001_mac 2001:db8:3::1 2001:db8:1::1 \ + 2001:db8:2::1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + in_ns ns1 forwarding_enable + + l2_vni_init + l3_vni_init +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20" + ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10" + ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20" + ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20" + ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 8e45792703ed..b7205a069534 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -812,5 +812,29 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "7d64", + "name": "Add police action with skip_hw option", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 100 skip_hw", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police | grep skip_hw", + "matchPattern": "skip_hw", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json index 51799874a972..2df68017dfb8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json @@ -387,5 +387,77 @@ "$TC qdisc del dev $DUMMY ingress", "$IP link del dev $DUMMY type dummy" ] + }, + { + "id": "3329", + "name": "Validate flags of the matchall filter with skip_sw and police action with skip_hw", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions flush action police", + "$TC actions add action police rate 1mbit burst 100k index 199 skip_hw" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199", + "expExitCode": "2", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions del action police index 199" + ] + }, + { + "id": "0eeb", + "name": "Validate flags of the matchall filter with skip_hw and police action", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions flush action police", + "$TC actions add action police rate 1mbit burst 100k index 199" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_hw action police index 199", + "expExitCode": "2", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions del action police index 199" + ] + }, + { + "id": "eee4", + "name": "Validate flags of the matchall filter with skip_sw and police action", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions flush action police", + "$TC actions add action police rate 1mbit burst 100k index 199" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199", + "expExitCode": "2", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions del action police index 199" + ] } ] |