diff options
Diffstat (limited to 'drivers/net/ethernet')
435 files changed, 31073 insertions, 5609 deletions
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 8aec5d9fbfef..ad57209007e1 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -138,11 +138,6 @@ MODULE_PARM_DESC(use_mmio, "Use MMIO (1) or PIO(0) to access the NIC. " module_param(rx_copybreak, int, 0); module_param(use_mmio, int, 0); -#if defined(NETIF_F_TSO) && MAX_SKB_FRAGS > 32 -#warning Typhoon only supports 32 entries in its SG list for TSO, disabling TSO -#undef NETIF_F_TSO -#endif - #if TXLO_ENTRIES <= (2 * MAX_SKB_FRAGS) #error TX ring too small! #endif @@ -2261,9 +2256,28 @@ out: return mode; } +#if MAX_SKB_FRAGS > 32 + +#include <net/vxlan.h> + +static netdev_features_t typhoon_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + if (skb_shinfo(skb)->nr_frags > 32 && skb_is_gso(skb)) + features &= ~NETIF_F_GSO_MASK; + + features = vlan_features_check(skb, features); + return vxlan_features_check(skb, features); +} +#endif + static const struct net_device_ops typhoon_netdev_ops = { .ndo_open = typhoon_open, .ndo_stop = typhoon_close, +#if MAX_SKB_FRAGS > 32 + .ndo_features_check = typhoon_features_check, +#endif .ndo_start_xmit = typhoon_start_tx, .ndo_set_rx_mode = typhoon_set_rx_mode, .ndo_tx_timeout = typhoon_tx_timeout, diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index db3ec4768159..bd4cb9d7c35d 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -78,6 +78,7 @@ source "drivers/net/ethernet/ezchip/Kconfig" source "drivers/net/ethernet/faraday/Kconfig" source "drivers/net/ethernet/freescale/Kconfig" source "drivers/net/ethernet/fujitsu/Kconfig" +source "drivers/net/ethernet/fungible/Kconfig" source "drivers/net/ethernet/google/Kconfig" source "drivers/net/ethernet/hisilicon/Kconfig" source "drivers/net/ethernet/huawei/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 8a87c1083d1d..8ef43e0c33c0 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NET_VENDOR_EZCHIP) += ezchip/ obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/ obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/ obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/ +obj-$(CONFIG_NET_VENDOR_FUNGIBLE) += fungible/ obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/ obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/ diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 537e6a85e18d..fbf4588994ac 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -2413,11 +2413,13 @@ static void et131x_tx_dma_memory_free(struct et131x_adapter *adapter) kfree(tx_ring->tcb_ring); } +#define MAX_TX_DESC_PER_PKT 24 + /* nic_send_packet - NIC specific send handler for version B silicon. */ static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb) { u32 i; - struct tx_desc desc[24]; + struct tx_desc desc[MAX_TX_DESC_PER_PKT]; u32 frag = 0; u32 thiscopy, remainder; struct sk_buff *skb = tcb->skb; @@ -2432,9 +2434,6 @@ static int nic_send_packet(struct et131x_adapter *adapter, struct tcb *tcb) * more than 5 fragments. */ - /* nr_frags should be no more than 18. */ - BUILD_BUG_ON(MAX_SKB_FRAGS + 1 > 23); - memset(desc, 0, sizeof(struct tx_desc) * (nr_frags + 1)); for (i = 0; i < nr_frags; i++) { @@ -3762,6 +3761,13 @@ static netdev_tx_t et131x_tx(struct sk_buff *skb, struct net_device *netdev) struct et131x_adapter *adapter = netdev_priv(netdev); struct tx_ring *tx_ring = &adapter->tx_ring; + /* This driver does not support TSO, it is very unlikely + * this condition is true. + */ + if (unlikely(skb_shinfo(skb)->nr_frags > MAX_TX_DESC_PER_PKT - 2)) { + if (skb_linearize(skb)) + goto drop_err; + } /* stop the queue if it's getting full */ if (tx_ring->used >= NUM_TCB - 1 && !netif_queue_stopped(netdev)) netif_stop_queue(netdev); diff --git a/drivers/net/ethernet/altera/altera_sgdma.c b/drivers/net/ethernet/altera/altera_sgdma.c index db97170da8c7..7f247ccbe6ba 100644 --- a/drivers/net/ethernet/altera/altera_sgdma.c +++ b/drivers/net/ethernet/altera/altera_sgdma.c @@ -513,7 +513,7 @@ static int sgdma_txbusy(struct altera_tse_private *priv) { int delay = 0; - /* if DMA is busy, wait for current transactino to finish */ + /* if DMA is busy, wait for current transaction to finish */ while ((csrrd32(priv->tx_dma_csr, sgdma_csroffs(status)) & SGDMA_STSREG_BUSY) && (delay++ < 100)) udelay(1); diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 993b2fb42961..a3816264c35c 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -72,7 +72,7 @@ MODULE_PARM_DESC(dma_tx_num, "Number of descriptors in the TX list"); */ #define ALTERA_RXDMABUFFER_SIZE 2048 -/* Allow network stack to resume queueing packets after we've +/* Allow network stack to resume queuing packets after we've * finished transmitting at least 1/4 of the packets in the queue. */ #define TSE_TX_THRESH(x) (x->tx_ring_size / 4) @@ -390,7 +390,7 @@ static int tse_rx(struct altera_tse_private *priv, int limit) "RCV pktstatus %08X pktlength %08X\n", pktstatus, pktlength); - /* DMA trasfer from TSE starts with 2 aditional bytes for + /* DMA transfer from TSE starts with 2 additional bytes for * IP payload alignment. Status returned by get_rx_status() * contains DMA transfer length. Packet is 2 bytes shorter. */ @@ -1044,7 +1044,7 @@ static void altera_tse_set_mcfilterall(struct net_device *dev) csrwr32(1, priv->mac_dev, tse_csroffs(hash_table) + i * 4); } -/* Set or clear the multicast filter for this adaptor +/* Set or clear the multicast filter for this adapter */ static void tse_set_rx_mode_hashfilter(struct net_device *dev) { @@ -1064,7 +1064,7 @@ static void tse_set_rx_mode_hashfilter(struct net_device *dev) spin_unlock(&priv->mac_cfg_lock); } -/* Set or clear the multicast filter for this adaptor +/* Set or clear the multicast filter for this adapter */ static void tse_set_rx_mode(struct net_device *dev) { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 53080fd143dc..07444aead3fd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1400,10 +1400,9 @@ static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) struct sk_buff *skb; if (!first_frag) - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_copybreak); + skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak); else - skb = build_skb(first_frag, ENA_PAGE_SIZE); + skb = napi_build_skb(first_frag, ENA_PAGE_SIZE); if (unlikely(!skb)) { ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1, diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c index e7a9f9863258..6ba5b024a7be 100644 --- a/drivers/net/ethernet/asix/ax88796c_main.c +++ b/drivers/net/ethernet/asix/ax88796c_main.c @@ -433,7 +433,7 @@ ax88796c_skb_return(struct ax88796c_device *ax_local, netif_info(ax_local, rx_status, ndev, "< rx, len %zu, type 0x%x\n", skb->len + sizeof(struct ethhdr), skb->protocol); - status = netif_rx_ni(skb); + status = netif_rx(skb); if (status != NET_RX_SUCCESS && net_ratelimit()) netif_info(ax_local, rx_err, ndev, "netif_rx status %d\n", status); @@ -1102,7 +1102,7 @@ err: return ret; } -static int ax88796c_remove(struct spi_device *spi) +static void ax88796c_remove(struct spi_device *spi) { struct ax88796c_device *ax_local = dev_get_drvdata(&spi->dev); struct net_device *ndev = ax_local->ndev; @@ -1112,8 +1112,6 @@ static int ax88796c_remove(struct spi_device *spi) netif_info(ax_local, probe, ndev, "removing network device %s %s\n", dev_driver_string(&spi->dev), dev_name(&spi->dev)); - - return 0; } #ifdef CONFIG_OF diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index b04e423c446a..c1b97e8c55ef 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1716,17 +1716,17 @@ static int bcm_enet_probe(struct platform_device *pdev) struct bcm_enet_priv *priv; struct net_device *dev; struct bcm63xx_enet_platform_data *pd; - struct resource *res_irq, *res_irq_rx, *res_irq_tx; + int irq, irq_rx, irq_tx; struct mii_bus *bus; int i, ret; if (!bcm_enet_shared_base[0]) return -EPROBE_DEFER; - res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - res_irq_rx = platform_get_resource(pdev, IORESOURCE_IRQ, 1); - res_irq_tx = platform_get_resource(pdev, IORESOURCE_IRQ, 2); - if (!res_irq || !res_irq_rx || !res_irq_tx) + irq = platform_get_irq(pdev, 0); + irq_rx = platform_get_irq(pdev, 1); + irq_tx = platform_get_irq(pdev, 2); + if (irq < 0 || irq_rx < 0 || irq_tx < 0) return -ENODEV; dev = alloc_etherdev(sizeof(*priv)); @@ -1748,9 +1748,9 @@ static int bcm_enet_probe(struct platform_device *pdev) goto out; } - dev->irq = priv->irq = res_irq->start; - priv->irq_rx = res_irq_rx->start; - priv->irq_tx = res_irq_tx->start; + dev->irq = priv->irq = irq; + priv->irq_rx = irq_rx; + priv->irq_tx = irq_tx; priv->mac_clk = devm_clk_get(&pdev->dev, "enet"); if (IS_ERR(priv->mac_clk)) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index a19dd6797070..447a75ea0cc1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1271,7 +1271,7 @@ struct bnx2x_fw_stats_data { struct per_port_stats port; struct per_pf_stats pf; struct fcoe_statistics_params fcoe; - struct per_queue_stats queue_stats[1]; + struct per_queue_stats queue_stats[]; }; /* Public slow path states */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b1c98d1408b8..2de02950086f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -233,6 +233,7 @@ static const u16 bnxt_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST, ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP, ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT, + ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE, }; static struct workqueue_struct *bnxt_pf_wq; @@ -2079,6 +2080,16 @@ static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2) (BNXT_EVENT_RING_TYPE(data2) == \ ASYNC_EVENT_CMPL_RING_MONITOR_MSG_EVENT_DATA2_DISABLE_RING_TYPE_RX) +#define BNXT_EVENT_PHC_EVENT_TYPE(data1) \ + (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK) >>\ + ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT) + +#define BNXT_EVENT_PHC_RTC_UPDATE(data1) \ + (((data1) & ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK) >>\ + ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT) + +#define BNXT_PHC_BITS 48 + static int bnxt_async_event_process(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl) { @@ -2258,6 +2269,24 @@ static int bnxt_async_event_process(struct bnxt *bp, bnxt_event_error_report(bp, data1, data2); goto async_event_process_exit; } + case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: { + switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) { + case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE: + if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u64 ns; + + spin_lock_bh(&ptp->ptp_lock); + bnxt_ptp_update_current_time(bp); + ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) << + BNXT_PHC_BITS) | ptp->current_time); + bnxt_ptp_rtc_timecounter_init(ptp, ns); + spin_unlock_bh(&ptp->ptp_lock); + } + break; + } + goto async_event_process_exit; + } case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: { u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff; @@ -7416,6 +7445,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) struct hwrm_port_mac_ptp_qcfg_output *resp; struct hwrm_port_mac_ptp_qcfg_input *req; struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + bool phc_cfg; u8 flags; int rc; @@ -7458,7 +7488,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) rc = -ENODEV; goto exit; } - rc = bnxt_ptp_init(bp); + phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0; + rc = bnxt_ptp_init(bp, phc_cfg); if (rc) netdev_warn(bp->dev, "PTP initialization failed.\n"); exit: @@ -7516,6 +7547,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_PTP_PPS; + if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_PTP_RTC; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_HOT_RESET_IF_SUPPORT)) bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED)) @@ -9267,7 +9300,7 @@ void bnxt_tx_enable(struct bnxt *bp) /* Make sure napi polls see @dev_state change */ synchronize_net(); netif_tx_wake_all_queues(bp->dev); - if (bp->link_info.link_up) + if (BNXT_LINK_IS_UP(bp)) netif_carrier_on(bp->dev); } @@ -9297,7 +9330,7 @@ static char *bnxt_report_fec(struct bnxt_link_info *link_info) void bnxt_report_link(struct bnxt *bp) { - if (bp->link_info.link_up) { + if (BNXT_LINK_IS_UP(bp)) { const char *signal = ""; const char *flow_ctrl; const char *duplex; @@ -9383,7 +9416,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) if (rc) goto hwrm_phy_qcaps_exit; - bp->phy_flags = resp->flags; + bp->phy_flags = resp->flags | (le16_to_cpu(resp->flags2) << 8); if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) { struct ethtool_eee *eee = &bp->eee; u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode); @@ -9433,7 +9466,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state) struct bnxt_link_info *link_info = &bp->link_info; struct hwrm_port_phy_qcfg_output *resp; struct hwrm_port_phy_qcfg_input *req; - u8 link_up = link_info->link_up; + u8 link_state = link_info->link_state; bool support_changed = false; int rc; @@ -9534,14 +9567,14 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state) /* TODO: need to add more logic to report VF link */ if (chng_link_state) { if (link_info->phy_link_status == BNXT_LINK_LINK) - link_info->link_up = 1; + link_info->link_state = BNXT_LINK_STATE_UP; else - link_info->link_up = 0; - if (link_up != link_info->link_up) + link_info->link_state = BNXT_LINK_STATE_DOWN; + if (link_state != link_info->link_state) bnxt_report_link(bp); } else { - /* alwasy link down if not require to update link state */ - link_info->link_up = 0; + /* always link down if not require to update link state */ + link_info->link_state = BNXT_LINK_STATE_DOWN; } hwrm_req_drop(bp, req); @@ -9741,7 +9774,18 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp) return rc; req->flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN); - return hwrm_req_send(bp, req); + rc = hwrm_req_send(bp, req); + if (!rc) { + mutex_lock(&bp->link_lock); + /* Device is not obliged link down in certain scenarios, even + * when forced. Setting the state unknown is consistent with + * driver startup and will force link state to be reported + * during subsequent open based on PORT_PHY_QCFG. + */ + bp->link_info.link_state = BNXT_LINK_STATE_UNKNOWN; + mutex_unlock(&bp->link_lock); + } + return rc; } static int bnxt_fw_reset_via_optee(struct bnxt *bp) @@ -10172,7 +10216,7 @@ static int bnxt_update_phy_setting(struct bnxt *bp) /* The last close may have shutdown the link, so need to call * PHY_CFG to bring it back up. */ - if (!bp->link_info.link_up) + if (!BNXT_LINK_IS_UP(bp)) update_link = true; if (!bnxt_eee_config_ok(bp)) @@ -10307,6 +10351,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); + bnxt_ptp_init_rtc(bp, true); return 0; open_err_irq: @@ -11403,7 +11448,7 @@ static void bnxt_timer(struct timer_list *t) if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) bnxt_fw_health_check(bp); - if (bp->link_info.link_up && bp->stats_coal_ticks) { + if (BNXT_LINK_IS_UP(bp) && bp->stats_coal_ticks) { set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event); bnxt_queue_sp_work(bp); } @@ -12104,11 +12149,6 @@ int bnxt_fw_init_one(struct bnxt *bp) if (rc) return rc; - /* In case fw capabilities have changed, destroy the unneeded - * reporters and create newly capable ones. - */ - bnxt_dl_fw_reporters_destroy(bp, false); - bnxt_dl_fw_reporters_create(bp); bnxt_fw_init_one_p3(bp); return 0; } @@ -12937,7 +12977,7 @@ static void bnxt_remove_one(struct pci_dev *pdev) cancel_delayed_work_sync(&bp->fw_reset_task); bp->sp_event = 0; - bnxt_dl_fw_reporters_destroy(bp, true); + bnxt_dl_fw_reporters_destroy(bp); bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 666fc1e7a7d2..447a9406b8a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1175,7 +1175,11 @@ struct bnxt_link_info { #define BNXT_PHY_STATE_ENABLED 0 #define BNXT_PHY_STATE_DISABLED 1 - u8 link_up; + u8 link_state; +#define BNXT_LINK_STATE_UNKNOWN 0 +#define BNXT_LINK_STATE_DOWN 1 +#define BNXT_LINK_STATE_UP 2 +#define BNXT_LINK_IS_UP(bp) ((bp)->link_info.link_state == BNXT_LINK_STATE_UP) u8 duplex; #define BNXT_LINK_DUPLEX_HALF PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF #define BNXT_LINK_DUPLEX_FULL PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL @@ -1958,6 +1962,7 @@ struct bnxt { #define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000 #define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000 #define BNXT_FW_CAP_HOT_RESET 0x00200000 + #define BNXT_FW_CAP_PTP_RTC 0x00400000 #define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000 #define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000 #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000 @@ -2099,8 +2104,8 @@ struct bnxt { u32 lpi_tmr_lo; u32 lpi_tmr_hi; - /* copied from flags in hwrm_port_phy_qcaps_output */ - u8 phy_flags; + /* copied from flags and flags2 in hwrm_port_phy_qcaps_output */ + u32 phy_flags; #define BNXT_PHY_FL_EEE_CAP PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED #define BNXT_PHY_FL_EXT_LPBK PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED #define BNXT_PHY_FL_AN_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED @@ -2109,6 +2114,8 @@ struct bnxt { #define BNXT_PHY_FL_NO_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED #define BNXT_PHY_FL_FW_MANAGED_LKDN PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN #define BNXT_PHY_FL_NO_FCS PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS +#define BNXT_PHY_FL_NO_PAUSE (PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED << 8) +#define BNXT_PHY_FL_NO_PFC (PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED << 8) u8 num_tests; struct bnxt_test_info *test_info; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 217ff597cdf2..caab3d626a2a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -627,7 +627,8 @@ static int bnxt_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) int rc; if (!(bp->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || - !(bp->dcbx_cap & DCB_CAP_DCBX_HOST)) + !(bp->dcbx_cap & DCB_CAP_DCBX_HOST) || + (bp->phy_flags & BNXT_PHY_FL_NO_PAUSE)) return -EINVAL; if (!my_pfc) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index f6e21fac0e69..0c17f90d44a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -241,37 +241,37 @@ static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = { .recover = bnxt_fw_recover, }; -void bnxt_dl_fw_reporters_create(struct bnxt *bp) +static struct devlink_health_reporter * +__bnxt_dl_reporter_create(struct bnxt *bp, + const struct devlink_health_reporter_ops *ops) { - struct bnxt_fw_health *health = bp->fw_health; - - if (!health || health->fw_reporter) - return; + struct devlink_health_reporter *reporter; - health->fw_reporter = - devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops, - 0, bp); - if (IS_ERR(health->fw_reporter)) { - netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n", - PTR_ERR(health->fw_reporter)); - health->fw_reporter = NULL; - bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY; + reporter = devlink_health_reporter_create(bp->dl, ops, 0, bp); + if (IS_ERR(reporter)) { + netdev_warn(bp->dev, "Failed to create %s health reporter, rc = %ld\n", + ops->name, PTR_ERR(reporter)); + return NULL; } + + return reporter; } -void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all) +void bnxt_dl_fw_reporters_create(struct bnxt *bp) { - struct bnxt_fw_health *health = bp->fw_health; + struct bnxt_fw_health *fw_health = bp->fw_health; - if (!health) - return; + if (fw_health && !fw_health->fw_reporter) + fw_health->fw_reporter = __bnxt_dl_reporter_create(bp, &bnxt_dl_fw_reporter_ops); +} - if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all) - return; +void bnxt_dl_fw_reporters_destroy(struct bnxt *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; - if (health->fw_reporter) { - devlink_health_reporter_destroy(health->fw_reporter); - health->fw_reporter = NULL; + if (fw_health && fw_health->fw_reporter) { + devlink_health_reporter_destroy(fw_health->fw_reporter); + fw_health->fw_reporter = NULL; } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index a715458abc30..b8105065367b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -75,7 +75,7 @@ void bnxt_devlink_health_fw_report(struct bnxt *bp); void bnxt_dl_health_fw_status_update(struct bnxt *bp, bool healthy); void bnxt_dl_health_fw_recovery_done(struct bnxt *bp); void bnxt_dl_fw_reporters_create(struct bnxt *bp); -void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all); +void bnxt_dl_fw_reporters_destroy(struct bnxt *bp); int bnxt_dl_register(struct bnxt *bp); void bnxt_dl_unregister(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8aaa2335f848..22e965e18fbc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -11,6 +11,7 @@ #include <linux/ctype.h> #include <linux/stringify.h> #include <linux/ethtool.h> +#include <linux/ethtool_netlink.h> #include <linux/linkmode.h> #include <linux/interrupt.h> #include <linux/pci.h> @@ -803,9 +804,11 @@ static void bnxt_get_ringparam(struct net_device *dev, if (bp->flags & BNXT_FLAG_AGG_RINGS) { ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA; ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } else { ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; ering->rx_jumbo_max_pending = 0; + kernel_ering->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; } ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT; @@ -1659,15 +1662,19 @@ static void bnxt_fw_to_ethtool_support_fec(struct bnxt_link_info *link_info, static void bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info, struct ethtool_link_ksettings *lk_ksettings) { + struct bnxt *bp = container_of(link_info, struct bnxt, link_info); u16 fw_speeds = link_info->support_speeds; BNXT_FW_TO_ETHTOOL_SPDS(fw_speeds, 0, lk_ksettings, supported); fw_speeds = link_info->support_pam4_speeds; BNXT_FW_TO_ETHTOOL_PAM4_SPDS(fw_speeds, lk_ksettings, supported); - ethtool_link_ksettings_add_link_mode(lk_ksettings, supported, Pause); - ethtool_link_ksettings_add_link_mode(lk_ksettings, supported, - Asym_Pause); + if (!(bp->phy_flags & BNXT_PHY_FL_NO_PAUSE)) { + ethtool_link_ksettings_add_link_mode(lk_ksettings, supported, + Pause); + ethtool_link_ksettings_add_link_mode(lk_ksettings, supported, + Asym_Pause); + } if (link_info->support_auto_speeds || link_info->support_pam4_auto_speeds) @@ -1898,7 +1905,8 @@ static int bnxt_set_link_ksettings(struct net_device *dev, /* any change to autoneg will cause link change, therefore the * driver should put back the original pause setting in autoneg */ - set_pause = true; + if (!(bp->phy_flags & BNXT_PHY_FL_NO_PAUSE)) + set_pause = true; } else { u8 phy_type = link_info->phy_type; @@ -2090,7 +2098,7 @@ static int bnxt_set_pauseparam(struct net_device *dev, struct bnxt *bp = netdev_priv(dev); struct bnxt_link_info *link_info = &bp->link_info; - if (!BNXT_PHY_CFG_ABLE(bp)) + if (!BNXT_PHY_CFG_ABLE(bp) || (bp->phy_flags & BNXT_PHY_FL_NO_PAUSE)) return -EOPNOTSUPP; mutex_lock(&bp->link_lock); @@ -2101,9 +2109,7 @@ static int bnxt_set_pauseparam(struct net_device *dev, } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; - if (bp->hwrm_spec_code >= 0x10201) - link_info->req_flow_ctrl = - PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE; + link_info->req_flow_ctrl = 0; } else { /* when transition from auto pause to force pause, * force a link change @@ -2132,7 +2138,7 @@ static u32 bnxt_get_link(struct net_device *dev) struct bnxt *bp = netdev_priv(dev); /* TODO: handle MF, VF, driver close case */ - return bp->link_info.link_up; + return BNXT_LINK_IS_UP(bp); } int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp, @@ -2509,6 +2515,7 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware u8 *kmem = NULL; u32 modify_len; u32 item_len; + u8 cmd_err; u16 index; int rc; @@ -2592,6 +2599,8 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware } rc = hwrm_req_send_silent(bp, install); + if (!rc) + break; if (defrag_attempted) { /* We have tried to defragment already in the previous @@ -2600,15 +2609,24 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware break; } - if (rc && ((struct hwrm_err_output *)resp)->cmd_err == - NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { + cmd_err = ((struct hwrm_err_output *)resp)->cmd_err; + + switch (cmd_err) { + case NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK: + netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure Anti-rollback detected\n"); + rc = -EALREADY; + break; + case NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR: install->flags = cpu_to_le16(NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); rc = hwrm_req_send_silent(bp, install); + if (!rc) + break; + + cmd_err = ((struct hwrm_err_output *)resp)->cmd_err; - if (rc && ((struct hwrm_err_output *)resp)->cmd_err == - NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE) { + if (cmd_err == NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE) { /* FW has cleared NVM area, driver will create * UPDATE directory and try the flash again */ @@ -2618,11 +2636,13 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware BNX_DIR_TYPE_UPDATE, BNX_DIR_ORDINAL_FIRST, 0, 0, item_len, NULL, 0); - } else if (rc) { - netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure rc :%x\n", rc); + if (!rc) + break; } - } else if (rc) { - netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure rc :%x\n", rc); + fallthrough; + default: + netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure rc :%x cmd_err :%x\n", + rc, cmd_err); } } while (defrag_attempted && !rc); @@ -3324,7 +3344,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp, return rc; fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB; - if (bp->link_info.link_up) + if (BNXT_LINK_IS_UP(bp)) fw_speed = bp->link_info.link_speed; else if (fw_advertising & BNXT_LINK_SPEED_MSK_10GB) fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index ea86c54247c7..b7100edbd6dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -369,6 +369,12 @@ struct cmd_nums { #define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL #define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL #define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL + #define HWRM_FUNC_BACKING_STORE_CFG_V2 0x1a3UL + #define HWRM_FUNC_BACKING_STORE_QCFG_V2 0x1a4UL + #define HWRM_FUNC_DBR_PACING_CFG 0x1a5UL + #define HWRM_FUNC_DBR_PACING_QCFG 0x1a6UL + #define HWRM_FUNC_DBR_PACING_BROADCAST_EVENT 0x1a7UL + #define HWRM_FUNC_BACKING_STORE_QCAPS_V2 0x1a8UL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -390,6 +396,9 @@ struct cmd_nums { #define HWRM_MFG_PRVSN_IMPORT_CERT 0x212UL #define HWRM_MFG_PRVSN_GET_STATE 0x213UL #define HWRM_MFG_GET_NVM_MEASUREMENT 0x214UL + #define HWRM_MFG_PSOC_QSTATUS 0x215UL + #define HWRM_MFG_SELFTEST_QLIST 0x216UL + #define HWRM_MFG_SELFTEST_EXEC 0x217UL #define HWRM_TF 0x2bcUL #define HWRM_TF_VERSION_GET 0x2bdUL #define HWRM_TF_SESSION_OPEN 0x2c6UL @@ -532,8 +541,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 2 -#define HWRM_VERSION_RSVD 63 -#define HWRM_VERSION_STR "1.10.2.63" +#define HWRM_VERSION_RSVD 73 +#define HWRM_VERSION_STR "1.10.2.73" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -757,10 +766,11 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE 0x40UL #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE 0x41UL #define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST 0x42UL - #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER 0x43UL + #define ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE 0x43UL #define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP 0x44UL #define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT 0x45UL - #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x46UL + #define ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD 0x46UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x47UL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -1112,34 +1122,37 @@ struct hwrm_async_event_cmpl_echo_request { __le32 event_data1; }; -/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */ -struct hwrm_async_event_cmpl_phc_master { +/* hwrm_async_event_cmpl_phc_update (size:128b/16B) */ +struct hwrm_async_event_cmpl_phc_update { __le16 type; - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK 0x3fUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT 0 - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT 0x2eUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_TYPE_HWRM_ASYNC_EVENT __le16 event_id; - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE 0x43UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_ID_PHC_UPDATE __le32 event_data2; - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0 - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT 16 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_MASTER_FID_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_MASK 0xffff0000UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA2_PHC_SEC_FID_SFT 16 u8 opaque_v; - #define ASYNC_EVENT_CMPL_PHC_MASTER_V 0x1UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_V 0x1UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_OPAQUE_SFT 1 u8 timestamp_lo; __le16 timestamp_hi; __le32 event_data1; - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK 0xfUL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT 0 - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL - #define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_MASK 0xfUL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_SFT 0 + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_MASTER 0x1UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_SECONDARY 0x2UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_FAILOVER 0x3UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE 0x4UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_LAST ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_MASK 0xffff0UL + #define ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_PHC_TIME_MSB_SFT 4 }; /* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */ @@ -1330,6 +1343,30 @@ struct hwrm_async_event_cmpl_error_report_nvm { #define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE }; +/* hwrm_async_event_cmpl_error_report_doorbell_drop_threshold (size:128b/16B) */ +struct hwrm_async_event_cmpl_error_report_doorbell_drop_threshold { + __le16 type; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_MASK 0x3fUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT 0x2eUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_TYPE_HWRM_ASYNC_EVENT + __le16 event_id; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT 0x45UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_ID_ERROR_REPORT + __le32 event_data2; + u8 opaque_v; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_V 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_MASK 0xfeUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_OPAQUE_SFT 1 + u8 timestamp_lo; + __le16 timestamp_hi; + __le32 event_data1; + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD +}; + /* hwrm_func_reset_input (size:192b/24B) */ struct hwrm_func_reset_input { __le16 req_type; @@ -1589,6 +1626,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL 0x800000UL #define FUNC_QCAPS_RESP_FLAGS_EXT_MIN_BW_SUPPORTED 0x1000000UL #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_COAL_CMPL_CAP 0x2000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED 0x4000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_REQUIRED 0x8000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_64BIT_RTC_SUPPORTED 0x10000000UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_DBR_PACING_SUPPORTED 0x20000000UL u8 max_schqs; u8 mpc_chnls_cap; #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL @@ -2455,7 +2496,7 @@ struct hwrm_func_backing_store_qcaps_output { __le16 rkc_entry_size; __le32 tkc_max_entries; __le32 rkc_max_entries; - u8 rsvd[7]; + u8 rsvd1[7]; u8 valid; }; @@ -3164,7 +3205,7 @@ struct hwrm_func_ptp_pin_cfg_output { u8 valid; }; -/* hwrm_func_ptp_cfg_input (size:320b/40B) */ +/* hwrm_func_ptp_cfg_input (size:384b/48B) */ struct hwrm_func_ptp_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -3178,6 +3219,7 @@ struct hwrm_func_ptp_cfg_input { #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD 0x8UL #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP 0x10UL #define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE 0x20UL + #define FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME 0x40UL u8 ptp_pps_event; #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL 0x1UL #define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL 0x2UL @@ -3204,6 +3246,7 @@ struct hwrm_func_ptp_cfg_input { __le32 ptp_freq_adj_ext_up; __le32 ptp_freq_adj_ext_phase_lower; __le32 ptp_freq_adj_ext_phase_upper; + __le64 ptp_set_time; }; /* hwrm_func_ptp_cfg_output (size:128b/16B) */ @@ -3243,6 +3286,308 @@ struct hwrm_func_ptp_ts_query_output { u8 valid; }; +/* hwrm_func_ptp_ext_cfg_input (size:256b/32B) */ +struct hwrm_func_ptp_ext_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 enables; + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_MASTER_FID 0x1UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_FID 0x2UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_PHC_SEC_MODE 0x4UL + #define FUNC_PTP_EXT_CFG_REQ_ENABLES_FAILOVER_TIMER 0x8UL + __le16 phc_master_fid; + __le16 phc_sec_fid; + u8 phc_sec_mode; + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_SWITCH 0x0UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_ALL 0x1UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY 0x2UL + #define FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_LAST FUNC_PTP_EXT_CFG_REQ_PHC_SEC_MODE_PF_ONLY + u8 unused_0; + __le32 failover_timer; + u8 unused_1[4]; +}; + +/* hwrm_func_ptp_ext_cfg_output (size:128b/16B) */ +struct hwrm_func_ptp_ext_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_ptp_ext_qcfg_input (size:192b/24B) */ +struct hwrm_func_ptp_ext_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + u8 unused_0[8]; +}; + +/* hwrm_func_ptp_ext_qcfg_output (size:256b/32B) */ +struct hwrm_func_ptp_ext_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 phc_master_fid; + __le16 phc_sec_fid; + __le16 phc_active_fid0; + __le16 phc_active_fid1; + __le32 last_failover_event; + __le16 from_fid; + __le16 to_fid; + u8 unused_0[7]; + u8 valid; +}; + +/* hwrm_func_backing_store_cfg_v2_input (size:448b/56B) */ +struct hwrm_func_backing_store_cfg_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID + __le16 instance; + __le32 flags; + #define FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_PREBOOT_MODE 0x1UL + __le64 page_dir; + __le32 num_entries; + __le16 entry_size; + u8 page_size_pbl_level; + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_SFT 0 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PBL_LEVEL_LVL_2 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_PAGE_SIZE_PG_1G + u8 subtype_valid_cnt; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; +}; + +/* hwrm_func_backing_store_cfg_v2_output (size:128b/16B) */ +struct hwrm_func_backing_store_cfg_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 rsvd0[7]; + u8 valid; +}; + +/* hwrm_func_backing_store_qcfg_v2_input (size:192b/24B) */ +struct hwrm_func_backing_store_qcfg_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID + __le16 instance; + u8 rsvd[4]; +}; + +/* hwrm_func_backing_store_qcfg_v2_output (size:448b/56B) */ +struct hwrm_func_backing_store_qcfg_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 type; + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID + __le16 instance; + __le32 flags; + __le64 page_dir; + __le32 num_entries; + u8 page_size_pbl_level; + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_MASK 0xfUL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_SFT 0 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PBL_LEVEL_LVL_2 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_SFT 4 + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_PAGE_SIZE_PG_1G + u8 subtype_valid_cnt; + u8 rsvd[2]; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; + u8 rsvd2[7]; + u8 valid; +}; + +/* qpc_split_entries (size:128b/16B) */ +struct qpc_split_entries { + __le32 qp_num_l2_entries; + __le32 qp_num_qp1_entries; + __le32 rsvd[2]; +}; + +/* srq_split_entries (size:128b/16B) */ +struct srq_split_entries { + __le32 srq_num_l2_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* cq_split_entries (size:128b/16B) */ +struct cq_split_entries { + __le32 cq_num_l2_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* vnic_split_entries (size:128b/16B) */ +struct vnic_split_entries { + __le32 vnic_num_vnic_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* mrav_split_entries (size:128b/16B) */ +struct mrav_split_entries { + __le32 mrav_num_av_entries; + __le32 rsvd; + __le32 rsvd2[2]; +}; + +/* hwrm_func_backing_store_qcaps_v2_input (size:192b/24B) */ +struct hwrm_func_backing_store_qcaps_v2_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 type; + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID + u8 rsvd[6]; +}; + +/* hwrm_func_backing_store_qcaps_v2_output (size:448b/56B) */ +struct hwrm_func_backing_store_qcaps_v2_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 type; + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_QP 0x0UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SRQ 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_VNIC 0x3UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_STAT 0x4UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_SP_TQM_RING 0x5UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_FP_TQM_RING 0x6UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MRAV 0xeUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TIM 0xfUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_TKC 0x13UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RKC 0x14UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_MP_TQM_RING 0x15UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID + __le16 entry_size; + __le32 flags; + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT 0x1UL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID 0x2UL + __le32 instance_bit_map; + u8 ctx_init_value; + u8 ctx_init_offset; + u8 entry_multiple; + u8 rsvd; + __le32 max_num_entries; + __le32 min_num_entries; + __le16 next_valid_type; + u8 subtype_valid_cnt; + u8 rsvd2; + __le32 split_entry_0; + __le32 split_entry_1; + __le32 split_entry_2; + __le32 split_entry_3; + u8 rsvd3[3]; + u8 valid; +}; + /* hwrm_func_drv_if_change_input (size:192b/24B) */ struct hwrm_func_drv_if_change_input { __le16 req_type; @@ -3741,7 +4086,7 @@ struct hwrm_port_phy_qcfg_output { u8 valid; }; -/* hwrm_port_mac_cfg_input (size:384b/48B) */ +/* hwrm_port_mac_cfg_input (size:448b/56B) */ struct hwrm_port_mac_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -3807,7 +4152,8 @@ struct hwrm_port_mac_cfg_input { #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5 u8 unused_0[3]; __le32 ptp_freq_adj_ppb; - __le32 ptp_adj_phase; + u8 unused_1[4]; + __le64 ptp_adj_phase; }; /* hwrm_port_mac_cfg_output (size:128b/16B) */ @@ -3850,6 +4196,7 @@ struct hwrm_port_mac_ptp_qcfg_output { #define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS 0x4UL #define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS 0x8UL #define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK 0x10UL + #define PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED 0x20UL u8 unused_0[3]; __le32 rx_ts_reg_off_lower; __le32 rx_ts_reg_off_upper; @@ -4339,7 +4686,8 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_PORT_CNT_2 0x2UL #define PORT_PHY_QCAPS_RESP_PORT_CNT_3 0x3UL #define PORT_PHY_QCAPS_RESP_PORT_CNT_4 0x4UL - #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_4 + #define PORT_PHY_QCAPS_RESP_PORT_CNT_12 0xcUL + #define PORT_PHY_QCAPS_RESP_PORT_CNT_LAST PORT_PHY_QCAPS_RESP_PORT_CNT_12 __le16 supported_speeds_force_mode; #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL #define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB 0x2UL @@ -4399,7 +4747,7 @@ struct hwrm_port_phy_qcaps_output { __le16 flags2; #define PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED 0x1UL #define PORT_PHY_QCAPS_RESP_FLAGS2_PFC_UNSUPPORTED 0x2UL - u8 unused_0[1]; + u8 internal_port_cnt; u8 valid; }; @@ -6221,12 +6569,13 @@ struct hwrm_vnic_rss_cfg_input { __le16 target_id; __le64 resp_addr; __le32 hash_type; - #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL - #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 0x1UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 0x2UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 0x4UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 0x8UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 0x10UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6 0x20UL + #define VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL 0x40UL __le16 vnic_id; u8 ring_table_pair_index; u8 hash_mode_flags; @@ -7898,6 +8247,7 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output { u8 valid; }; +/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */ struct hwrm_tunnel_dst_port_query_input { __le16 req_type; __le16 cmpl_ring; @@ -8909,6 +9259,50 @@ struct hwrm_dbg_qcfg_output { u8 valid; }; +/* hwrm_dbg_crashdump_medium_cfg_input (size:320b/40B) */ +struct hwrm_dbg_crashdump_medium_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 output_dest_flags; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR 0x1UL + __le16 pg_size_lvl; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_MASK 0x3UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_SFT 0 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_0 0x0UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_1 0x1UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 0x2UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_LVL_LVL_2 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_MASK 0x1cUL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_SFT 2 + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K (0x0UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K (0x1UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K (0x2UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_2M (0x3UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8M (0x4UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G (0x5UL << 2) + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_LAST DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_1G + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_MASK 0xffe0UL + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_UNUSED11_SFT 5 + __le32 size; + __le32 coredump_component_disable_flags; + #define DBG_CRASHDUMP_MEDIUM_CFG_REQ_NVRAM 0x1UL + __le32 unused_0; + __le64 pbl; +}; + +/* hwrm_dbg_crashdump_medium_cfg_output (size:128b/16B) */ +struct hwrm_dbg_crashdump_medium_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_1[7]; + u8 valid; +}; + /* coredump_segment_record (size:128b/16B) */ struct coredump_segment_record { __le16 component_id; @@ -9372,8 +9766,35 @@ struct hwrm_nvm_install_update_output { __le16 resp_len; __le64 installed_items; u8 result; - #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL - #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS + #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_FAILURE 0xffUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_MALLOC_FAILURE 0xfdUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_INDEX_PARAMETER 0xfbUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TYPE_PARAMETER 0xf3UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PREREQUISITE 0xf2UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_FILE_HEADER 0xecUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_SIGNATURE 0xebUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_STREAM 0xeaUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_PROP_LENGTH 0xe9UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_MANIFEST 0xe8UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_TRAILER 0xe7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_CHECKSUM 0xe6UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_ITEM_CHECKSUM 0xe5UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DATA_LENGTH 0xe4UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INVALID_DIRECTIVE 0xe1UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_CHIP_REV 0xceUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_DEVICE_ID 0xcdUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_VENDOR 0xccUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_SUBSYS_ID 0xcbUL + #define NVM_INSTALL_UPDATE_RESP_RESULT_UNSUPPORTED_PLATFORM 0xc5UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_DUPLICATE_ITEM 0xc4UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ZERO_LENGTH_ITEM 0xc3UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_CHECKSUM_ERROR 0xb9UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_DATA_ERROR 0xb8UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_INSTALL_AUTHENTICATION_ERROR 0xb7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_NOT_FOUND 0xb0UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED 0xa7UL + #define NVM_INSTALL_UPDATE_RESP_RESULT_LAST NVM_INSTALL_UPDATE_RESP_RESULT_ITEM_LOCKED u8 problem_item; #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 48520967746f..a0b321a19361 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -19,6 +19,20 @@ #include "bnxt_hwrm.h" #include "bnxt_ptp.h" +static int bnxt_ptp_cfg_settime(struct bnxt *bp, u64 time) +{ + struct hwrm_func_ptp_cfg_input *req; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG); + if (rc) + return rc; + + req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_SET_TIME); + req->ptp_set_time = cpu_to_le64(time); + return hwrm_req_send(bp, req); +} + int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off) { unsigned int ptp_class; @@ -48,6 +62,9 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info, ptp_info); u64 ns = timespec64_to_ns(ts); + if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC) + return bnxt_ptp_cfg_settime(ptp->bp, ns); + spin_lock_bh(&ptp->ptp_lock); timecounter_init(&ptp->tc, &ptp->cc, ns); spin_unlock_bh(&ptp->ptp_lock); @@ -131,11 +148,47 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info, return 0; } +/* Caller holds ptp_lock */ +void bnxt_ptp_update_current_time(struct bnxt *bp) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + bnxt_refclk_read(ptp->bp, NULL, &ptp->current_time); + WRITE_ONCE(ptp->old_time, ptp->current_time); +} + +static int bnxt_ptp_adjphc(struct bnxt_ptp_cfg *ptp, s64 delta) +{ + struct hwrm_port_mac_cfg_input *req; + int rc; + + rc = hwrm_req_init(ptp->bp, req, HWRM_PORT_MAC_CFG); + if (rc) + return rc; + + req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE); + req->ptp_adj_phase = cpu_to_le64(delta); + + rc = hwrm_req_send(ptp->bp, req); + if (rc) { + netdev_err(ptp->bp->dev, "ptp adjphc failed. rc = %x\n", rc); + } else { + spin_lock_bh(&ptp->ptp_lock); + bnxt_ptp_update_current_time(ptp->bp); + spin_unlock_bh(&ptp->ptp_lock); + } + + return rc; +} + static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta) { struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, ptp_info); + if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC) + return bnxt_ptp_adjphc(ptp, delta); + spin_lock_bh(&ptp->ptp_lock); timecounter_adjtime(&ptp->tc, delta); spin_unlock_bh(&ptp->ptp_lock); @@ -714,7 +767,70 @@ static bool bnxt_pps_config_ok(struct bnxt *bp) return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config; } -int bnxt_ptp_init(struct bnxt *bp) +static void bnxt_ptp_timecounter_init(struct bnxt *bp, bool init_tc) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + if (!ptp->ptp_clock) { + memset(&ptp->cc, 0, sizeof(ptp->cc)); + ptp->cc.read = bnxt_cc_read; + ptp->cc.mask = CYCLECOUNTER_MASK(48); + ptp->cc.shift = 0; + ptp->cc.mult = 1; + ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; + } + if (init_tc) + timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); +} + +/* Caller holds ptp_lock */ +void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns) +{ + timecounter_init(&ptp->tc, &ptp->cc, ns); + /* For RTC, cycle_last must be in sync with the timecounter value. */ + ptp->tc.cycle_last = ns & ptp->cc.mask; +} + +int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg) +{ + struct timespec64 tsp; + u64 ns; + int rc; + + if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) + return -ENODEV; + + if (!phc_cfg) { + ktime_get_real_ts64(&tsp); + ns = timespec64_to_ns(&tsp); + rc = bnxt_ptp_cfg_settime(bp, ns); + if (rc) + return rc; + } else { + rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, &ns); + if (rc) + return rc; + } + spin_lock_bh(&bp->ptp_cfg->ptp_lock); + bnxt_ptp_rtc_timecounter_init(bp->ptp_cfg, ns); + spin_unlock_bh(&bp->ptp_cfg->ptp_lock); + + return 0; +} + +static void bnxt_ptp_free(struct bnxt *bp) +{ + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + if (ptp->ptp_clock) { + ptp_clock_unregister(ptp->ptp_clock); + ptp->ptp_clock = NULL; + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; + } +} + +int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; int rc; @@ -726,26 +842,23 @@ int bnxt_ptp_init(struct bnxt *bp) if (rc) return rc; + if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { + bnxt_ptp_timecounter_init(bp, false); + rc = bnxt_ptp_init_rtc(bp, phc_cfg); + if (rc) + goto out; + } + if (ptp->ptp_clock && bnxt_pps_config_ok(bp)) return 0; - if (ptp->ptp_clock) { - ptp_clock_unregister(ptp->ptp_clock); - ptp->ptp_clock = NULL; - kfree(ptp->ptp_info.pin_config); - ptp->ptp_info.pin_config = NULL; - } + bnxt_ptp_free(bp); + atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); - memset(&ptp->cc, 0, sizeof(ptp->cc)); - ptp->cc.read = bnxt_cc_read; - ptp->cc.mask = CYCLECOUNTER_MASK(48); - ptp->cc.shift = 0; - ptp->cc.mult = 1; - - ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; - timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); + if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) + bnxt_ptp_timecounter_init(bp, true); ptp->ptp_info = bnxt_ptp_caps; if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) { @@ -757,8 +870,8 @@ int bnxt_ptp_init(struct bnxt *bp) int err = PTR_ERR(ptp->ptp_clock); ptp->ptp_clock = NULL; - bnxt_unmap_ptp_regs(bp); - return err; + rc = err; + goto out; } if (bp->flags & BNXT_FLAG_CHIP_P5) { spin_lock_bh(&ptp->ptp_lock); @@ -768,6 +881,11 @@ int bnxt_ptp_init(struct bnxt *bp) ptp_schedule_worker(ptp->ptp_clock, 0); } return 0; + +out: + bnxt_ptp_free(bp); + bnxt_unmap_ptp_regs(bp); + return rc; } void bnxt_ptp_clear(struct bnxt *bp) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 7c528e1f8713..373baf45884b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -131,12 +131,15 @@ do { \ #endif int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off); +void bnxt_ptp_update_current_time(struct bnxt *bp); void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); -int bnxt_ptp_init(struct bnxt *bp); +void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns); +int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg); +int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg); void bnxt_ptp_clear(struct bnxt *bp); #endif diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 87f1056e29ff..cfe09117fe6c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1368,7 +1368,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(dev->phydev, 0); + ret = phy_init_eee(dev->phydev, false); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 9ddbee7de72b..f0a7d8396a4a 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -12,6 +12,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/net_tstamp.h> #include <linux/interrupt.h> +#include <linux/phy/phy.h> #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP) #define MACB_EXT_DESC @@ -1291,6 +1292,9 @@ struct macb { u32 wol; struct macb_ptp_info *ptp_info; /* macb-ptp interface */ + + struct phy *sgmii_phy; /* for ZynqMP SGMII mode */ + #ifdef MACB_EXT_DESC uint8_t hw_dma_cap; #endif diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index d13f06cf0308..800d5ced5800 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -34,7 +34,9 @@ #include <linux/udp.h> #include <linux/tcp.h> #include <linux/iopoll.h> +#include <linux/phy/phy.h> #include <linux/pm_runtime.h> +#include <linux/reset.h> #include "macb.h" /* This structure is only used for MACB on SiFive FU540 devices */ @@ -2762,10 +2764,14 @@ static int macb_open(struct net_device *dev) macb_init_hw(bp); - err = macb_phylink_connect(bp); + err = phy_power_on(bp->sgmii_phy); if (err) goto reset_hw; + err = macb_phylink_connect(bp); + if (err) + goto phy_off; + netif_tx_start_all_queues(dev); if (bp->ptp_info) @@ -2773,6 +2779,9 @@ static int macb_open(struct net_device *dev) return 0; +phy_off: + phy_power_off(bp->sgmii_phy); + reset_hw: macb_reset_hw(bp); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) @@ -2798,6 +2807,8 @@ static int macb_close(struct net_device *dev) phylink_stop(bp->phylink); phylink_disconnect_phy(bp->phylink); + phy_power_off(bp->sgmii_phy); + spin_lock_irqsave(&bp->lock, flags); macb_reset_hw(bp); netif_carrier_off(dev); @@ -4567,13 +4578,55 @@ static const struct macb_config np4_config = { .usrio = &macb_default_usrio, }; +static int zynqmp_init(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct macb *bp = netdev_priv(dev); + int ret; + + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { + /* Ensure PS-GTR PHY device used in SGMII mode is ready */ + bp->sgmii_phy = devm_phy_get(&pdev->dev, "sgmii-phy"); + + if (IS_ERR(bp->sgmii_phy)) { + ret = PTR_ERR(bp->sgmii_phy); + dev_err_probe(&pdev->dev, ret, + "failed to get PS-GTR PHY\n"); + return ret; + } + + ret = phy_init(bp->sgmii_phy); + if (ret) { + dev_err(&pdev->dev, "failed to init PS-GTR PHY: %d\n", + ret); + return ret; + } + } + + /* Fully reset GEM controller at hardware level using zynqmp-reset driver, + * if mapped in device tree. + */ + ret = device_reset_optional(&pdev->dev); + if (ret) { + dev_err_probe(&pdev->dev, ret, "failed to reset controller"); + phy_exit(bp->sgmii_phy); + return ret; + } + + ret = macb_init(pdev); + if (ret) + phy_exit(bp->sgmii_phy); + + return ret; +} + static const struct macb_config zynqmp_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH, .dma_burst_length = 16, .clk_init = macb_clk_init, - .init = macb_init, + .init = zynqmp_init, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, }; @@ -4790,7 +4843,7 @@ static int macb_probe(struct platform_device *pdev) err = macb_mii_init(bp); if (err) - goto err_out_free_netdev; + goto err_out_phy_exit; netif_carrier_off(dev); @@ -4815,6 +4868,9 @@ err_out_unregister_mdio: mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); +err_out_phy_exit: + phy_exit(bp->sgmii_phy); + err_out_free_netdev: free_netdev(dev); @@ -4836,6 +4892,7 @@ static int macb_remove(struct platform_device *pdev) if (dev) { bp = netdev_priv(dev); + phy_exit(bp->sgmii_phy); mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 574a32f23f96..2f6484dc186a 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1409,7 +1409,8 @@ static acpi_status bgx_acpi_register_phy(acpi_handle handle, struct device *dev = &bgx->pdev->dev; struct acpi_device *adev; - if (acpi_bus_get_device(handle, &adev)) + adev = acpi_fetch_acpi_dev(handle); + if (!adev) goto out; acpi_get_mac_address(dev, adev, bgx->lmac[bgx->acpi_lmac_idx].mac); diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 63521312cb90..174b1e156669 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -3349,6 +3349,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } if (!adapter->registered_device_map) { dev_err(&pdev->dev, "could not register any net devices\n"); + err = -ENODEV; goto out_free_dev; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index 28fd2de9e4cf..1672d3afe5be 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -8,6 +8,46 @@ #include "cxgb4_filter.h" #include "cxgb4_tc_flower.h" +static int cxgb4_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int cxgb4_matchall_egress_validate(struct net_device *dev, struct tc_cls_matchall_offload *cls) { @@ -48,11 +88,10 @@ static int cxgb4_matchall_egress_validate(struct net_device *dev, flow_action_for_each(i, entry, actions) { switch (entry->id) { case FLOW_ACTION_POLICE: - if (entry->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + ret = cxgb4_policer_validate(actions, entry, extack); + if (ret) + return ret; + /* Convert bytes per second to bits per second */ if (entry->police.rate_bytes_ps * 8 > max_link_rate) { NL_SET_ERR_MSG_MOD(extack, @@ -150,11 +189,11 @@ static int cxgb4_matchall_alloc_tc(struct net_device *dev, flow_action_for_each(i, entry, &cls->rule->action) if (entry->id == FLOW_ACTION_POLICE) break; - if (entry->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + + ret = cxgb4_policer_validate(&cls->rule->action, entry, extack); + if (ret) + return ret; + /* Convert from bytes per second to Kbps */ p.u.params.maxrate = div_u64(entry->police.rate_bytes_ps * 8, 1000); p.u.params.channel = pi->tx_chan; diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index c78b99a497df..8014eb33937c 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2363,11 +2363,13 @@ static void gemini_port_save_mac_addr(struct gemini_ethernet_port *port) static int gemini_ethernet_port_probe(struct platform_device *pdev) { char *port_names[2] = { "ethernet0", "ethernet1" }; + struct device_node *np = pdev->dev.of_node; struct gemini_ethernet_port *port; struct device *dev = &pdev->dev; struct gemini_ethernet *geth; struct net_device *netdev; struct device *parent; + u8 mac[ETH_ALEN]; unsigned int id; int irq; int ret; @@ -2473,6 +2475,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) netif_napi_add(netdev, &port->napi, gmac_napi_poll, DEFAULT_NAPI_WEIGHT); + ret = of_get_mac_address(np, mac); + if (!ret) { + dev_info(dev, "Setting macaddr from DT %pM\n", mac); + memcpy(port->mac_addr, mac, ETH_ALEN); + } + if (is_valid_ether_addr((void *)port->mac_addr)) { eth_hw_addr_set(netdev, (u8 *)port->mac_addr); } else { diff --git a/drivers/net/ethernet/davicom/Kconfig b/drivers/net/ethernet/davicom/Kconfig index 7af86b6d4150..02e0caff98e3 100644 --- a/drivers/net/ethernet/davicom/Kconfig +++ b/drivers/net/ethernet/davicom/Kconfig @@ -3,6 +3,19 @@ # Davicom device configuration # +config NET_VENDOR_DAVICOM + bool "Davicom devices" + default y + help + If you have a network (Ethernet) card belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Davicom devices. If you say Y, you will be asked + for your specific card in the following selections. + +if NET_VENDOR_DAVICOM + config DM9000 tristate "DM9000 support" depends on ARM || MIPS || COLDFIRE || NIOS2 || COMPILE_TEST @@ -22,3 +35,21 @@ config DM9000_FORCE_SIMPLE_PHY_POLL bit to determine if the link is up or down instead of the more costly MII PHY reads. Note, this will not work if the chip is operating with an external PHY. + +config DM9051 + tristate "DM9051 SPI support" + depends on SPI + select CRC32 + select MDIO + select PHYLIB + select REGMAP_SPI + help + Support for DM9051 SPI chipset. + + To compile this driver as a module, choose M here. The module + will be called dm9051. + + The SPI mode for the host's SPI master to access DM9051 is mode + 0 on the SPI bus. + +endif # NET_VENDOR_DAVICOM diff --git a/drivers/net/ethernet/davicom/Makefile b/drivers/net/ethernet/davicom/Makefile index 173c87d21076..225f85bc1f53 100644 --- a/drivers/net/ethernet/davicom/Makefile +++ b/drivers/net/ethernet/davicom/Makefile @@ -4,3 +4,4 @@ # obj-$(CONFIG_DM9000) += dm9000.o +obj-$(CONFIG_DM9051) += dm9051.o diff --git a/drivers/net/ethernet/davicom/dm9051.c b/drivers/net/ethernet/davicom/dm9051.c new file mode 100644 index 000000000000..a523ddda7609 --- /dev/null +++ b/drivers/net/ethernet/davicom/dm9051.c @@ -0,0 +1,1260 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Davicom Semiconductor,Inc. + * Davicom DM9051 SPI Fast Ethernet Linux driver + */ + +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/irq.h> +#include <linux/mii.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include <linux/regmap.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/spi/spi.h> +#include <linux/types.h> + +#include "dm9051.h" + +#define DRVNAME_9051 "dm9051" + +/** + * struct rx_ctl_mach - rx activities record + * @status_err_counter: rx status error counter + * @large_err_counter: rx get large packet length error counter + * @rx_err_counter: receive packet error counter + * @tx_err_counter: transmit packet error counter + * @fifo_rst_counter: reset operation counter + * + * To keep track for the driver operation statistics + */ +struct rx_ctl_mach { + u16 status_err_counter; + u16 large_err_counter; + u16 rx_err_counter; + u16 tx_err_counter; + u16 fifo_rst_counter; +}; + +/** + * struct dm9051_rxctrl - dm9051 driver rx control + * @hash_table: Multicast hash-table data + * @rcr_all: KS_RXCR1 register setting + * + * The settings needs to control the receive filtering + * such as the multicast hash-filter and the receive register settings + */ +struct dm9051_rxctrl { + u16 hash_table[4]; + u8 rcr_all; +}; + +/** + * struct dm9051_rxhdr - rx packet data header + * @headbyte: lead byte equal to 0x01 notifies a valid packet + * @status: status bits for the received packet + * @rxlen: packet length + * + * The Rx packed, entered into the FIFO memory, start with these + * four bytes which is the Rx header, followed by the ethernet + * packet data and ends with an appended 4-byte CRC data. + * Both Rx packet and CRC data are for check purpose and finally + * are dropped by this driver + */ +struct dm9051_rxhdr { + u8 headbyte; + u8 status; + __le16 rxlen; +}; + +/** + * struct board_info - maintain the saved data + * @spidev: spi device structure + * @ndev: net device structure + * @mdiobus: mii bus structure + * @phydev: phy device structure + * @txq: tx queue structure + * @regmap_dm: regmap for register read/write + * @regmap_dmbulk: extra regmap for bulk read/write + * @rxctrl_work: Work queue for updating RX mode and multicast lists + * @tx_work: Work queue for tx packets + * @pause: ethtool pause parameter structure + * @spi_lockm: between threads lock structure + * @reg_mutex: regmap access lock structure + * @bc: rx control statistics structure + * @rxhdr: rx header structure + * @rctl: rx control setting structure + * @msg_enable: message level value + * @imr_all: to store operating imr value for register DM9051_IMR + * @lcr_all: to store operating rcr value for register DM9051_LMCR + * + * The saved data variables, keep up to date for retrieval back to use + */ +struct board_info { + u32 msg_enable; + struct spi_device *spidev; + struct net_device *ndev; + struct mii_bus *mdiobus; + struct phy_device *phydev; + struct sk_buff_head txq; + struct regmap *regmap_dm; + struct regmap *regmap_dmbulk; + struct work_struct rxctrl_work; + struct work_struct tx_work; + struct ethtool_pauseparam pause; + struct mutex spi_lockm; + struct mutex reg_mutex; + struct rx_ctl_mach bc; + struct dm9051_rxhdr rxhdr; + struct dm9051_rxctrl rctl; + u8 imr_all; + u8 lcr_all; +}; + +static int dm9051_set_reg(struct board_info *db, unsigned int reg, unsigned int val) +{ + int ret; + + ret = regmap_write(db->regmap_dm, reg, val); + if (ret < 0) + netif_err(db, drv, db->ndev, "%s: error %d set reg %02x\n", + __func__, ret, reg); + return ret; +} + +static int dm9051_update_bits(struct board_info *db, unsigned int reg, unsigned int mask, + unsigned int val) +{ + int ret; + + ret = regmap_update_bits(db->regmap_dm, reg, mask, val); + if (ret < 0) + netif_err(db, drv, db->ndev, "%s: error %d update bits reg %02x\n", + __func__, ret, reg); + return ret; +} + +/* skb buffer exhausted, just discard the received data + */ +static int dm9051_dumpblk(struct board_info *db, u8 reg, size_t count) +{ + struct net_device *ndev = db->ndev; + unsigned int rb; + int ret; + + /* no skb buffer, + * both reg and &rb must be noinc, + * read once one byte via regmap_read + */ + do { + ret = regmap_read(db->regmap_dm, reg, &rb); + if (ret < 0) { + netif_err(db, drv, ndev, "%s: error %d dumping read reg %02x\n", + __func__, ret, reg); + break; + } + } while (--count); + + return ret; +} + +static int dm9051_set_regs(struct board_info *db, unsigned int reg, const void *val, + size_t val_count) +{ + int ret; + + ret = regmap_bulk_write(db->regmap_dmbulk, reg, val, val_count); + if (ret < 0) + netif_err(db, drv, db->ndev, "%s: error %d bulk writing regs %02x\n", + __func__, ret, reg); + return ret; +} + +static int dm9051_get_regs(struct board_info *db, unsigned int reg, void *val, + size_t val_count) +{ + int ret; + + ret = regmap_bulk_read(db->regmap_dmbulk, reg, val, val_count); + if (ret < 0) + netif_err(db, drv, db->ndev, "%s: error %d bulk reading regs %02x\n", + __func__, ret, reg); + return ret; +} + +static int dm9051_write_mem(struct board_info *db, unsigned int reg, const void *buff, + size_t len) +{ + int ret; + + ret = regmap_noinc_write(db->regmap_dm, reg, buff, len); + if (ret < 0) + netif_err(db, drv, db->ndev, "%s: error %d noinc writing regs %02x\n", + __func__, ret, reg); + return ret; +} + +static int dm9051_read_mem(struct board_info *db, unsigned int reg, void *buff, + size_t len) +{ + int ret; + + ret = regmap_noinc_read(db->regmap_dm, reg, buff, len); + if (ret < 0) + netif_err(db, drv, db->ndev, "%s: error %d noinc reading regs %02x\n", + __func__, ret, reg); + return ret; +} + +/* waiting tx-end rather than tx-req + * got faster + */ +static int dm9051_nsr_poll(struct board_info *db) +{ + unsigned int mval; + int ret; + + ret = regmap_read_poll_timeout(db->regmap_dm, DM9051_NSR, mval, + mval & (NSR_TX2END | NSR_TX1END), 1, 20); + if (ret == -ETIMEDOUT) + netdev_err(db->ndev, "timeout in checking for tx end\n"); + return ret; +} + +static int dm9051_epcr_poll(struct board_info *db) +{ + unsigned int mval; + int ret; + + ret = regmap_read_poll_timeout(db->regmap_dm, DM9051_EPCR, mval, + !(mval & EPCR_ERRE), 100, 10000); + if (ret == -ETIMEDOUT) + netdev_err(db->ndev, "eeprom/phy in processing get timeout\n"); + return ret; +} + +static int dm9051_irq_flag(struct board_info *db) +{ + struct spi_device *spi = db->spidev; + int irq_type = irq_get_trigger_type(spi->irq); + + if (irq_type) + return irq_type; + + return IRQF_TRIGGER_LOW; +} + +static unsigned int dm9051_intcr_value(struct board_info *db) +{ + return (dm9051_irq_flag(db) == IRQF_TRIGGER_LOW) ? + INTCR_POL_LOW : INTCR_POL_HIGH; +} + +static int dm9051_set_fcr(struct board_info *db) +{ + u8 fcr = 0; + + if (db->pause.rx_pause) + fcr |= FCR_BKPM | FCR_FLCE; + if (db->pause.tx_pause) + fcr |= FCR_TXPEN; + + return dm9051_set_reg(db, DM9051_FCR, fcr); +} + +static int dm9051_set_recv(struct board_info *db) +{ + int ret; + + ret = dm9051_set_regs(db, DM9051_MAR, db->rctl.hash_table, sizeof(db->rctl.hash_table)); + if (ret) + return ret; + + return dm9051_set_reg(db, DM9051_RCR, db->rctl.rcr_all); /* enable rx */ +} + +static int dm9051_core_reset(struct board_info *db) +{ + int ret; + + db->bc.fifo_rst_counter++; + + ret = regmap_write(db->regmap_dm, DM9051_NCR, NCR_RST); /* NCR reset */ + if (ret) + return ret; + ret = regmap_write(db->regmap_dm, DM9051_MBNDRY, MBNDRY_BYTE); /* MemBound */ + if (ret) + return ret; + ret = regmap_write(db->regmap_dm, DM9051_PPCR, PPCR_PAUSE_COUNT); /* Pause Count */ + if (ret) + return ret; + ret = regmap_write(db->regmap_dm, DM9051_LMCR, db->lcr_all); /* LEDMode1 */ + if (ret) + return ret; + + return dm9051_set_reg(db, DM9051_INTCR, dm9051_intcr_value(db)); +} + +static int dm9051_update_fcr(struct board_info *db) +{ + u8 fcr = 0; + + if (db->pause.rx_pause) + fcr |= FCR_BKPM | FCR_FLCE; + if (db->pause.tx_pause) + fcr |= FCR_TXPEN; + + return dm9051_update_bits(db, DM9051_FCR, FCR_RXTX_BITS, fcr); +} + +static int dm9051_disable_interrupt(struct board_info *db) +{ + return dm9051_set_reg(db, DM9051_IMR, IMR_PAR); /* disable int */ +} + +static int dm9051_enable_interrupt(struct board_info *db) +{ + return dm9051_set_reg(db, DM9051_IMR, db->imr_all); /* enable int */ +} + +static int dm9051_stop_mrcmd(struct board_info *db) +{ + return dm9051_set_reg(db, DM9051_ISR, ISR_STOP_MRCMD); /* to stop mrcmd */ +} + +static int dm9051_clear_interrupt(struct board_info *db) +{ + return dm9051_update_bits(db, DM9051_ISR, ISR_CLR_INT, ISR_CLR_INT); +} + +static int dm9051_eeprom_read(struct board_info *db, int offset, u8 *to) +{ + int ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPAR, offset); + if (ret) + return ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPCR, EPCR_ERPRR); + if (ret) + return ret; + + ret = dm9051_epcr_poll(db); + if (ret) + return ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPCR, 0); + if (ret) + return ret; + + return regmap_bulk_read(db->regmap_dmbulk, DM9051_EPDRL, to, 2); +} + +static int dm9051_eeprom_write(struct board_info *db, int offset, u8 *data) +{ + int ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPAR, offset); + if (ret) + return ret; + + ret = regmap_bulk_write(db->regmap_dmbulk, DM9051_EPDRL, data, 2); + if (ret < 0) + return ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPCR, EPCR_WEP | EPCR_ERPRW); + if (ret) + return ret; + + ret = dm9051_epcr_poll(db); + if (ret) + return ret; + + return regmap_write(db->regmap_dm, DM9051_EPCR, 0); +} + +static int dm9051_phyread(void *context, unsigned int reg, unsigned int *val) +{ + struct board_info *db = context; + int ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPAR, DM9051_PHY | reg); + if (ret) + return ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPCR, EPCR_ERPRR | EPCR_EPOS); + if (ret) + return ret; + + ret = dm9051_epcr_poll(db); + if (ret) + return ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPCR, 0); + if (ret) + return ret; + + /* this is a 4 bytes data, clear to zero since following regmap_bulk_read + * only fill lower 2 bytes + */ + *val = 0; + return regmap_bulk_read(db->regmap_dmbulk, DM9051_EPDRL, val, 2); +} + +static int dm9051_phywrite(void *context, unsigned int reg, unsigned int val) +{ + struct board_info *db = context; + int ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPAR, DM9051_PHY | reg); + if (ret) + return ret; + + ret = regmap_bulk_write(db->regmap_dmbulk, DM9051_EPDRL, &val, 2); + if (ret < 0) + return ret; + + ret = regmap_write(db->regmap_dm, DM9051_EPCR, EPCR_EPOS | EPCR_ERPRW); + if (ret) + return ret; + + ret = dm9051_epcr_poll(db); + if (ret) + return ret; + + return regmap_write(db->regmap_dm, DM9051_EPCR, 0); +} + +static int dm9051_mdio_read(struct mii_bus *bus, int addr, int regnum) +{ + struct board_info *db = bus->priv; + unsigned int val = 0xffff; + int ret; + + if (addr == DM9051_PHY_ADDR) { + ret = dm9051_phyread(db, regnum, &val); + if (ret) + return ret; + } + + return val; +} + +static int dm9051_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) +{ + struct board_info *db = bus->priv; + + if (addr == DM9051_PHY_ADDR) + return dm9051_phywrite(db, regnum, val); + + return -ENODEV; +} + +static void dm9051_reg_lock_mutex(void *dbcontext) +{ + struct board_info *db = dbcontext; + + mutex_lock(&db->reg_mutex); +} + +static void dm9051_reg_unlock_mutex(void *dbcontext) +{ + struct board_info *db = dbcontext; + + mutex_unlock(&db->reg_mutex); +} + +static struct regmap_config regconfigdm = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0xff, + .reg_stride = 1, + .cache_type = REGCACHE_NONE, + .read_flag_mask = 0, + .write_flag_mask = DM_SPI_WR, + .val_format_endian = REGMAP_ENDIAN_LITTLE, + .lock = dm9051_reg_lock_mutex, + .unlock = dm9051_reg_unlock_mutex, +}; + +static struct regmap_config regconfigdmbulk = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0xff, + .reg_stride = 1, + .cache_type = REGCACHE_NONE, + .read_flag_mask = 0, + .write_flag_mask = DM_SPI_WR, + .val_format_endian = REGMAP_ENDIAN_LITTLE, + .lock = dm9051_reg_lock_mutex, + .unlock = dm9051_reg_unlock_mutex, + .use_single_read = true, + .use_single_write = true, +}; + +static int dm9051_map_init(struct spi_device *spi, struct board_info *db) +{ + /* create two regmap instances, + * split read/write and bulk_read/bulk_write to individual regmap + * to resolve regmap execution confliction problem + */ + regconfigdm.lock_arg = db; + db->regmap_dm = devm_regmap_init_spi(db->spidev, ®configdm); + if (IS_ERR(db->regmap_dm)) + return PTR_ERR(db->regmap_dm); + + regconfigdmbulk.lock_arg = db; + db->regmap_dmbulk = devm_regmap_init_spi(db->spidev, ®configdmbulk); + if (IS_ERR(db->regmap_dmbulk)) + return PTR_ERR(db->regmap_dmbulk); + + return 0; +} + +static int dm9051_map_chipid(struct board_info *db) +{ + struct device *dev = &db->spidev->dev; + unsigned short wid; + u8 buff[6]; + int ret; + + ret = dm9051_get_regs(db, DM9051_VIDL, buff, sizeof(buff)); + if (ret < 0) + return ret; + + wid = get_unaligned_le16(buff + 2); + if (wid != DM9051_ID) { + dev_err(dev, "chipid error as %04x !\n", wid); + return -ENODEV; + } + + dev_info(dev, "chip %04x found\n", wid); + return 0; +} + +/* Read DM9051_PAR registers which is the mac address loaded from EEPROM while power-on + */ +static int dm9051_map_etherdev_par(struct net_device *ndev, struct board_info *db) +{ + u8 addr[ETH_ALEN]; + int ret; + + ret = dm9051_get_regs(db, DM9051_PAR, addr, sizeof(addr)); + if (ret < 0) + return ret; + + if (!is_valid_ether_addr(addr)) { + eth_hw_addr_random(ndev); + + ret = dm9051_set_regs(db, DM9051_PAR, ndev->dev_addr, sizeof(ndev->dev_addr)); + if (ret < 0) + return ret; + + dev_dbg(&db->spidev->dev, "Use random MAC address\n"); + return 0; + } + + eth_hw_addr_set(ndev, addr); + return 0; +} + +/* ethtool-ops + */ +static void dm9051_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strscpy(info->driver, DRVNAME_9051, sizeof(info->driver)); +} + +static void dm9051_set_msglevel(struct net_device *ndev, u32 value) +{ + struct board_info *db = to_dm9051_board(ndev); + + db->msg_enable = value; +} + +static u32 dm9051_get_msglevel(struct net_device *ndev) +{ + struct board_info *db = to_dm9051_board(ndev); + + return db->msg_enable; +} + +static int dm9051_get_eeprom_len(struct net_device *dev) +{ + return 128; +} + +static int dm9051_get_eeprom(struct net_device *ndev, + struct ethtool_eeprom *ee, u8 *data) +{ + struct board_info *db = to_dm9051_board(ndev); + int offset = ee->offset; + int len = ee->len; + int i, ret; + + if ((len | offset) & 1) + return -EINVAL; + + ee->magic = DM_EEPROM_MAGIC; + + for (i = 0; i < len; i += 2) { + ret = dm9051_eeprom_read(db, (offset + i) / 2, data + i); + if (ret) + break; + } + return ret; +} + +static int dm9051_set_eeprom(struct net_device *ndev, + struct ethtool_eeprom *ee, u8 *data) +{ + struct board_info *db = to_dm9051_board(ndev); + int offset = ee->offset; + int len = ee->len; + int i, ret; + + if ((len | offset) & 1) + return -EINVAL; + + if (ee->magic != DM_EEPROM_MAGIC) + return -EINVAL; + + for (i = 0; i < len; i += 2) { + ret = dm9051_eeprom_write(db, (offset + i) / 2, data + i); + if (ret) + break; + } + return ret; +} + +static void dm9051_get_pauseparam(struct net_device *ndev, + struct ethtool_pauseparam *pause) +{ + struct board_info *db = to_dm9051_board(ndev); + + *pause = db->pause; +} + +static int dm9051_set_pauseparam(struct net_device *ndev, + struct ethtool_pauseparam *pause) +{ + struct board_info *db = to_dm9051_board(ndev); + + db->pause = *pause; + + if (pause->autoneg == AUTONEG_DISABLE) + return dm9051_update_fcr(db); + + phy_set_sym_pause(db->phydev, pause->rx_pause, pause->tx_pause, + pause->autoneg); + phy_start_aneg(db->phydev); + return 0; +} + +static const struct ethtool_ops dm9051_ethtool_ops = { + .get_drvinfo = dm9051_get_drvinfo, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_msglevel = dm9051_get_msglevel, + .set_msglevel = dm9051_set_msglevel, + .nway_reset = phy_ethtool_nway_reset, + .get_link = ethtool_op_get_link, + .get_eeprom_len = dm9051_get_eeprom_len, + .get_eeprom = dm9051_get_eeprom, + .set_eeprom = dm9051_set_eeprom, + .get_pauseparam = dm9051_get_pauseparam, + .set_pauseparam = dm9051_set_pauseparam, +}; + +static int dm9051_all_start(struct board_info *db) +{ + int ret; + + /* GPR power on of the internal phy + */ + ret = dm9051_set_reg(db, DM9051_GPR, 0); + if (ret) + return ret; + + /* dm9051 chip registers could not be accessed within 1 ms + * after GPR power on, delay 1 ms is essential + */ + msleep(1); + + ret = dm9051_core_reset(db); + if (ret) + return ret; + + return dm9051_enable_interrupt(db); +} + +static int dm9051_all_stop(struct board_info *db) +{ + int ret; + + /* GPR power off of the internal phy, + * The internal phy still could be accessed after this GPR power off control + */ + ret = dm9051_set_reg(db, DM9051_GPR, GPR_PHY_OFF); + if (ret) + return ret; + + return dm9051_set_reg(db, DM9051_RCR, RCR_RX_DISABLE); +} + +/* fifo reset while rx error found + */ +static int dm9051_all_restart(struct board_info *db) +{ + struct net_device *ndev = db->ndev; + int ret; + + ret = dm9051_core_reset(db); + if (ret) + return ret; + + ret = dm9051_enable_interrupt(db); + if (ret) + return ret; + + netdev_dbg(ndev, " rxstatus_Er & rxlen_Er %d, RST_c %d\n", + db->bc.status_err_counter + db->bc.large_err_counter, + db->bc.fifo_rst_counter); + + ret = dm9051_set_recv(db); + if (ret) + return ret; + + return dm9051_set_fcr(db); +} + +/* read packets from the fifo memory + * return value, + * > 0 - read packet number, caller can repeat the rx operation + * 0 - no error, caller need stop further rx operation + * -EBUSY - read data error, caller escape from rx operation + */ +static int dm9051_loop_rx(struct board_info *db) +{ + struct net_device *ndev = db->ndev; + unsigned int rxbyte; + int ret, rxlen; + struct sk_buff *skb; + u8 *rdptr; + int scanrr = 0; + + do { + ret = dm9051_read_mem(db, DM_SPI_MRCMDX, &rxbyte, 2); + if (ret) + return ret; + + if ((rxbyte & GENMASK(7, 0)) != DM9051_PKT_RDY) + break; /* exhaust-empty */ + + ret = dm9051_read_mem(db, DM_SPI_MRCMD, &db->rxhdr, DM_RXHDR_SIZE); + if (ret) + return ret; + + ret = dm9051_stop_mrcmd(db); + if (ret) + return ret; + + rxlen = le16_to_cpu(db->rxhdr.rxlen); + if (db->rxhdr.status & RSR_ERR_BITS || rxlen > DM9051_PKT_MAX) { + netdev_dbg(ndev, "rxhdr-byte (%02x)\n", + db->rxhdr.headbyte); + + if (db->rxhdr.status & RSR_ERR_BITS) { + db->bc.status_err_counter++; + netdev_dbg(ndev, "check rxstatus-error (%02x)\n", + db->rxhdr.status); + } else { + db->bc.large_err_counter++; + netdev_dbg(ndev, "check rxlen large-error (%d > %d)\n", + rxlen, DM9051_PKT_MAX); + } + return dm9051_all_restart(db); + } + + skb = dev_alloc_skb(rxlen); + if (!skb) { + ret = dm9051_dumpblk(db, DM_SPI_MRCMD, rxlen); + if (ret) + return ret; + return scanrr; + } + + rdptr = skb_put(skb, rxlen - 4); + ret = dm9051_read_mem(db, DM_SPI_MRCMD, rdptr, rxlen); + if (ret) { + db->bc.rx_err_counter++; + dev_kfree_skb(skb); + return ret; + } + + ret = dm9051_stop_mrcmd(db); + if (ret) + return ret; + + skb->protocol = eth_type_trans(skb, db->ndev); + if (db->ndev->features & NETIF_F_RXCSUM) + skb_checksum_none_assert(skb); + netif_rx(skb); + db->ndev->stats.rx_bytes += rxlen; + db->ndev->stats.rx_packets++; + scanrr++; + } while (!ret); + + return scanrr; +} + +/* transmit a packet, + * return value, + * 0 - succeed + * -ETIMEDOUT - timeout error + */ +static int dm9051_single_tx(struct board_info *db, u8 *buff, unsigned int len) +{ + int ret; + + ret = dm9051_nsr_poll(db); + if (ret) + return ret; + + ret = dm9051_write_mem(db, DM_SPI_MWCMD, buff, len); + if (ret) + return ret; + + ret = dm9051_set_regs(db, DM9051_TXPLL, &len, 2); + if (ret < 0) + return ret; + + return dm9051_set_reg(db, DM9051_TCR, TCR_TXREQ); +} + +static int dm9051_loop_tx(struct board_info *db) +{ + struct net_device *ndev = db->ndev; + int ntx = 0; + int ret; + + while (!skb_queue_empty(&db->txq)) { + struct sk_buff *skb; + unsigned int len; + + skb = skb_dequeue(&db->txq); + if (skb) { + ntx++; + ret = dm9051_single_tx(db, skb->data, skb->len); + len = skb->len; + dev_kfree_skb(skb); + if (ret < 0) { + db->bc.tx_err_counter++; + return 0; + } + ndev->stats.tx_bytes += len; + ndev->stats.tx_packets++; + } + + if (netif_queue_stopped(ndev) && + (skb_queue_len(&db->txq) < DM9051_TX_QUE_LO_WATER)) + netif_wake_queue(ndev); + } + + return ntx; +} + +static irqreturn_t dm9051_rx_threaded_irq(int irq, void *pw) +{ + struct board_info *db = pw; + int result, result_tx; + + mutex_lock(&db->spi_lockm); + + result = dm9051_disable_interrupt(db); + if (result) + goto out_unlock; + + result = dm9051_clear_interrupt(db); + if (result) + goto out_unlock; + + do { + result = dm9051_loop_rx(db); /* threaded irq rx */ + if (result < 0) + goto out_unlock; + result_tx = dm9051_loop_tx(db); /* more tx better performance */ + if (result_tx < 0) + goto out_unlock; + } while (result > 0); + + dm9051_enable_interrupt(db); + + /* To exit and has mutex unlock while rx or tx error + */ +out_unlock: + mutex_unlock(&db->spi_lockm); + + return IRQ_HANDLED; +} + +static void dm9051_tx_delay(struct work_struct *work) +{ + struct board_info *db = container_of(work, struct board_info, tx_work); + int result; + + mutex_lock(&db->spi_lockm); + + result = dm9051_loop_tx(db); + if (result < 0) + netdev_err(db->ndev, "transmit packet error\n"); + + mutex_unlock(&db->spi_lockm); +} + +static void dm9051_rxctl_delay(struct work_struct *work) +{ + struct board_info *db = container_of(work, struct board_info, rxctrl_work); + struct net_device *ndev = db->ndev; + int result; + + mutex_lock(&db->spi_lockm); + + result = dm9051_set_regs(db, DM9051_PAR, ndev->dev_addr, sizeof(ndev->dev_addr)); + if (result < 0) + goto out_unlock; + + dm9051_set_recv(db); + + /* To has mutex unlock and return from this function if regmap function fail + */ +out_unlock: + mutex_unlock(&db->spi_lockm); +} + +/* Open network device + * Called when the network device is marked active, such as a user executing + * 'ifconfig up' on the device + */ +static int dm9051_open(struct net_device *ndev) +{ + struct board_info *db = to_dm9051_board(ndev); + struct spi_device *spi = db->spidev; + int ret; + + db->imr_all = IMR_PAR | IMR_PRM; + db->lcr_all = LMCR_MODE1; + db->rctl.rcr_all = RCR_DIS_LONG | RCR_DIS_CRC | RCR_RXEN; + memset(db->rctl.hash_table, 0, sizeof(db->rctl.hash_table)); + + ndev->irq = spi->irq; /* by dts */ + ret = request_threaded_irq(spi->irq, NULL, dm9051_rx_threaded_irq, + dm9051_irq_flag(db) | IRQF_ONESHOT, + ndev->name, db); + if (ret < 0) { + netdev_err(ndev, "failed to get irq\n"); + return ret; + } + + phy_support_sym_pause(db->phydev); + phy_start(db->phydev); + + /* flow control parameters init */ + db->pause.rx_pause = true; + db->pause.tx_pause = true; + db->pause.autoneg = AUTONEG_DISABLE; + + if (db->phydev->autoneg) + db->pause.autoneg = AUTONEG_ENABLE; + + ret = dm9051_all_start(db); + if (ret) { + phy_stop(db->phydev); + free_irq(spi->irq, db); + return ret; + } + + netif_wake_queue(ndev); + + return 0; +} + +/* Close network device + * Called to close down a network device which has been active. Cancel any + * work, shutdown the RX and TX process and then place the chip into a low + * power state while it is not being used + */ +static int dm9051_stop(struct net_device *ndev) +{ + struct board_info *db = to_dm9051_board(ndev); + int ret; + + ret = dm9051_all_stop(db); + if (ret) + return ret; + + flush_work(&db->tx_work); + flush_work(&db->rxctrl_work); + + phy_stop(db->phydev); + + free_irq(db->spidev->irq, db); + + netif_stop_queue(ndev); + + skb_queue_purge(&db->txq); + + return 0; +} + +/* event: play a schedule starter in condition + */ +static netdev_tx_t dm9051_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct board_info *db = to_dm9051_board(ndev); + + skb_queue_tail(&db->txq, skb); + if (skb_queue_len(&db->txq) > DM9051_TX_QUE_HI_WATER) + netif_stop_queue(ndev); /* enforce limit queue size */ + + schedule_work(&db->tx_work); + + return NETDEV_TX_OK; +} + +/* event: play with a schedule starter + */ +static void dm9051_set_rx_mode(struct net_device *ndev) +{ + struct board_info *db = to_dm9051_board(ndev); + struct dm9051_rxctrl rxctrl; + struct netdev_hw_addr *ha; + u8 rcr = RCR_DIS_LONG | RCR_DIS_CRC | RCR_RXEN; + u32 hash_val; + + memset(&rxctrl, 0, sizeof(rxctrl)); + + /* rx control */ + if (ndev->flags & IFF_PROMISC) { + rcr |= RCR_PRMSC; + netdev_dbg(ndev, "set_multicast rcr |= RCR_PRMSC, rcr= %02x\n", rcr); + } + + if (ndev->flags & IFF_ALLMULTI) { + rcr |= RCR_ALL; + netdev_dbg(ndev, "set_multicast rcr |= RCR_ALLMULTI, rcr= %02x\n", rcr); + } + + rxctrl.rcr_all = rcr; + + /* broadcast address */ + rxctrl.hash_table[0] = 0; + rxctrl.hash_table[1] = 0; + rxctrl.hash_table[2] = 0; + rxctrl.hash_table[3] = 0x8000; + + /* the multicast address in Hash Table : 64 bits */ + netdev_for_each_mc_addr(ha, ndev) { + hash_val = ether_crc_le(ETH_ALEN, ha->addr) & GENMASK(5, 0); + rxctrl.hash_table[hash_val / 16] |= BIT(0) << (hash_val % 16); + } + + /* schedule work to do the actual set of the data if needed */ + + if (memcmp(&db->rctl, &rxctrl, sizeof(rxctrl))) { + memcpy(&db->rctl, &rxctrl, sizeof(rxctrl)); + schedule_work(&db->rxctrl_work); + } +} + +/* event: write into the mac registers and eeprom directly + */ +static int dm9051_set_mac_address(struct net_device *ndev, void *p) +{ + struct board_info *db = to_dm9051_board(ndev); + int ret; + + ret = eth_prepare_mac_addr_change(ndev, p); + if (ret < 0) + return ret; + + eth_commit_mac_addr_change(ndev, p); + return dm9051_set_regs(db, DM9051_PAR, ndev->dev_addr, sizeof(ndev->dev_addr)); +} + +static const struct net_device_ops dm9051_netdev_ops = { + .ndo_open = dm9051_open, + .ndo_stop = dm9051_stop, + .ndo_start_xmit = dm9051_start_xmit, + .ndo_set_rx_mode = dm9051_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = dm9051_set_mac_address, +}; + +static void dm9051_operation_clear(struct board_info *db) +{ + db->bc.status_err_counter = 0; + db->bc.large_err_counter = 0; + db->bc.rx_err_counter = 0; + db->bc.tx_err_counter = 0; + db->bc.fifo_rst_counter = 0; +} + +static int dm9051_mdio_register(struct board_info *db) +{ + struct spi_device *spi = db->spidev; + int ret; + + db->mdiobus = devm_mdiobus_alloc(&spi->dev); + if (!db->mdiobus) + return -ENOMEM; + + db->mdiobus->priv = db; + db->mdiobus->read = dm9051_mdio_read; + db->mdiobus->write = dm9051_mdio_write; + db->mdiobus->name = "dm9051-mdiobus"; + db->mdiobus->phy_mask = (u32)~BIT(1); + db->mdiobus->parent = &spi->dev; + snprintf(db->mdiobus->id, MII_BUS_ID_SIZE, + "dm9051-%s.%u", dev_name(&spi->dev), spi->chip_select); + + ret = devm_mdiobus_register(&spi->dev, db->mdiobus); + if (ret) + dev_err(&spi->dev, "Could not register MDIO bus\n"); + + return ret; +} + +static void dm9051_handle_link_change(struct net_device *ndev) +{ + struct board_info *db = to_dm9051_board(ndev); + + phy_print_status(db->phydev); + + /* only write pause settings to mac. since mac and phy are integrated + * together, such as link state, speed and duplex are sync already + */ + if (db->phydev->link) { + if (db->phydev->pause) { + db->pause.rx_pause = true; + db->pause.tx_pause = true; + } + dm9051_update_fcr(db); + } +} + +/* phy connect as poll mode + */ +static int dm9051_phy_connect(struct board_info *db) +{ + char phy_id[MII_BUS_ID_SIZE + 3]; + + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, + db->mdiobus->id, DM9051_PHY_ADDR); + + db->phydev = phy_connect(db->ndev, phy_id, dm9051_handle_link_change, + PHY_INTERFACE_MODE_MII); + if (IS_ERR(db->phydev)) + return PTR_ERR_OR_ZERO(db->phydev); + return 0; +} + +static int dm9051_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct net_device *ndev; + struct board_info *db; + int ret; + + ndev = devm_alloc_etherdev(dev, sizeof(struct board_info)); + if (!ndev) + return -ENOMEM; + + SET_NETDEV_DEV(ndev, dev); + dev_set_drvdata(dev, ndev); + + db = netdev_priv(ndev); + + db->msg_enable = 0; + db->spidev = spi; + db->ndev = ndev; + + ndev->netdev_ops = &dm9051_netdev_ops; + ndev->ethtool_ops = &dm9051_ethtool_ops; + + mutex_init(&db->spi_lockm); + mutex_init(&db->reg_mutex); + + INIT_WORK(&db->rxctrl_work, dm9051_rxctl_delay); + INIT_WORK(&db->tx_work, dm9051_tx_delay); + + ret = dm9051_map_init(spi, db); + if (ret) + return ret; + + ret = dm9051_map_chipid(db); + if (ret) + return ret; + + ret = dm9051_map_etherdev_par(ndev, db); + if (ret < 0) + return ret; + + ret = dm9051_mdio_register(db); + if (ret) + return ret; + + ret = dm9051_phy_connect(db); + if (ret) + return ret; + + dm9051_operation_clear(db); + skb_queue_head_init(&db->txq); + + ret = devm_register_netdev(dev, ndev); + if (ret) { + phy_disconnect(db->phydev); + return dev_err_probe(dev, ret, "device register failed"); + } + + return 0; +} + +static void dm9051_drv_remove(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct net_device *ndev = dev_get_drvdata(dev); + struct board_info *db = to_dm9051_board(ndev); + + phy_disconnect(db->phydev); +} + +static const struct of_device_id dm9051_match_table[] = { + { .compatible = "davicom,dm9051" }, + {} +}; + +static const struct spi_device_id dm9051_id_table[] = { + { "dm9051", 0 }, + {} +}; + +static struct spi_driver dm9051_driver = { + .driver = { + .name = DRVNAME_9051, + .of_match_table = dm9051_match_table, + }, + .probe = dm9051_probe, + .remove = dm9051_drv_remove, + .id_table = dm9051_id_table, +}; +module_spi_driver(dm9051_driver); + +MODULE_AUTHOR("Joseph CHANG <joseph_chang@davicom.com.tw>"); +MODULE_DESCRIPTION("Davicom DM9051 network SPI driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/davicom/dm9051.h b/drivers/net/ethernet/davicom/dm9051.h new file mode 100644 index 000000000000..fef3120edd7c --- /dev/null +++ b/drivers/net/ethernet/davicom/dm9051.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Davicom Semiconductor,Inc. + * Davicom DM9051 SPI Fast Ethernet Linux driver + */ + +#ifndef _DM9051_H_ +#define _DM9051_H_ + +#include <linux/bits.h> +#include <linux/netdevice.h> +#include <linux/types.h> + +#define DM9051_ID 0x9051 + +#define DM9051_NCR 0x00 +#define DM9051_NSR 0x01 +#define DM9051_TCR 0x02 +#define DM9051_RCR 0x05 +#define DM9051_BPTR 0x08 +#define DM9051_FCR 0x0A +#define DM9051_EPCR 0x0B +#define DM9051_EPAR 0x0C +#define DM9051_EPDRL 0x0D +#define DM9051_EPDRH 0x0E +#define DM9051_PAR 0x10 +#define DM9051_MAR 0x16 +#define DM9051_GPCR 0x1E +#define DM9051_GPR 0x1F + +#define DM9051_VIDL 0x28 +#define DM9051_VIDH 0x29 +#define DM9051_PIDL 0x2A +#define DM9051_PIDH 0x2B +#define DM9051_SMCR 0x2F +#define DM9051_ATCR 0x30 +#define DM9051_SPIBCR 0x38 +#define DM9051_INTCR 0x39 +#define DM9051_PPCR 0x3D + +#define DM9051_MPCR 0x55 +#define DM9051_LMCR 0x57 +#define DM9051_MBNDRY 0x5E + +#define DM9051_MRRL 0x74 +#define DM9051_MRRH 0x75 +#define DM9051_MWRL 0x7A +#define DM9051_MWRH 0x7B +#define DM9051_TXPLL 0x7C +#define DM9051_TXPLH 0x7D +#define DM9051_ISR 0x7E +#define DM9051_IMR 0x7F + +#define DM_SPI_MRCMDX 0x70 +#define DM_SPI_MRCMD 0x72 +#define DM_SPI_MWCMD 0x78 + +#define DM_SPI_WR 0x80 + +/* dm9051 Ethernet controller registers bits + */ +/* 0x00 */ +#define NCR_WAKEEN BIT(6) +#define NCR_FDX BIT(3) +#define NCR_RST BIT(0) +/* 0x01 */ +#define NSR_SPEED BIT(7) +#define NSR_LINKST BIT(6) +#define NSR_WAKEST BIT(5) +#define NSR_TX2END BIT(3) +#define NSR_TX1END BIT(2) +/* 0x02 */ +#define TCR_DIS_JABBER_TIMER BIT(6) /* for Jabber Packet support */ +#define TCR_TXREQ BIT(0) +/* 0x05 */ +#define RCR_DIS_WATCHDOG_TIMER BIT(6) /* for Jabber Packet support */ +#define RCR_DIS_LONG BIT(5) +#define RCR_DIS_CRC BIT(4) +#define RCR_ALL BIT(3) +#define RCR_PRMSC BIT(1) +#define RCR_RXEN BIT(0) +#define RCR_RX_DISABLE (RCR_DIS_LONG | RCR_DIS_CRC) +/* 0x06 */ +#define RSR_RF BIT(7) +#define RSR_MF BIT(6) +#define RSR_LCS BIT(5) +#define RSR_RWTO BIT(4) +#define RSR_PLE BIT(3) +#define RSR_AE BIT(2) +#define RSR_CE BIT(1) +#define RSR_FOE BIT(0) +#define RSR_ERR_BITS (RSR_RF | RSR_LCS | RSR_RWTO | RSR_PLE | \ + RSR_AE | RSR_CE | RSR_FOE) +/* 0x0A */ +#define FCR_TXPEN BIT(5) +#define FCR_BKPM BIT(3) +#define FCR_FLCE BIT(0) +#define FCR_RXTX_BITS (FCR_TXPEN | FCR_BKPM | FCR_FLCE) +/* 0x0B */ +#define EPCR_WEP BIT(4) +#define EPCR_EPOS BIT(3) +#define EPCR_ERPRR BIT(2) +#define EPCR_ERPRW BIT(1) +#define EPCR_ERRE BIT(0) +/* 0x1E */ +#define GPCR_GEP_CNTL BIT(0) +/* 0x1F */ +#define GPR_PHY_OFF BIT(0) +/* 0x30 */ +#define ATCR_AUTO_TX BIT(7) +/* 0x39 */ +#define INTCR_POL_LOW (1 << 0) +#define INTCR_POL_HIGH (0 << 0) +/* 0x3D */ +/* Pause Packet Control Register - default = 1 */ +#define PPCR_PAUSE_COUNT 0x08 +/* 0x55 */ +#define MPCR_RSTTX BIT(1) +#define MPCR_RSTRX BIT(0) +/* 0x57 */ +/* LEDMode Control Register - LEDMode1 */ +/* Value 0x81 : bit[7] = 1, bit[2] = 0, bit[1:0] = 01b */ +#define LMCR_NEWMOD BIT(7) +#define LMCR_TYPED1 BIT(1) +#define LMCR_TYPED0 BIT(0) +#define LMCR_MODE1 (LMCR_NEWMOD | LMCR_TYPED0) +/* 0x5E */ +#define MBNDRY_BYTE BIT(7) +/* 0xFE */ +#define ISR_MBS BIT(7) +#define ISR_LNKCHG BIT(5) +#define ISR_ROOS BIT(3) +#define ISR_ROS BIT(2) +#define ISR_PTS BIT(1) +#define ISR_PRS BIT(0) +#define ISR_CLR_INT (ISR_LNKCHG | ISR_ROOS | ISR_ROS | \ + ISR_PTS | ISR_PRS) +#define ISR_STOP_MRCMD (ISR_MBS) +/* 0xFF */ +#define IMR_PAR BIT(7) +#define IMR_LNKCHGI BIT(5) +#define IMR_PTM BIT(1) +#define IMR_PRM BIT(0) + +/* Const + */ +#define DM9051_PHY_ADDR 1 /* PHY id */ +#define DM9051_PHY 0x40 /* PHY address 0x01 */ +#define DM9051_PKT_RDY 0x01 /* Packet ready to receive */ +#define DM9051_PKT_MAX 1536 /* Received packet max size */ +#define DM9051_TX_QUE_HI_WATER 50 +#define DM9051_TX_QUE_LO_WATER 25 +#define DM_EEPROM_MAGIC 0x9051 + +#define DM_RXHDR_SIZE sizeof(struct dm9051_rxhdr) + +static inline struct board_info *to_dm9051_board(struct net_device *ndev) +{ + return netdev_priv(ndev); +} + +#endif /* _DM9051_H_ */ diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c index 3fb39e32e1b4..653bde48ef44 100644 --- a/drivers/net/ethernet/dec/tulip/pnic.c +++ b/drivers/net/ethernet/dec/tulip/pnic.c @@ -21,7 +21,7 @@ void pnic_do_nway(struct net_device *dev) struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->base_addr; u32 phy_reg = ioread32(ioaddr + 0xB8); - u32 new_csr6 = tp->csr6 & ~0x40C40200; + u32 new_csr6; if (phy_reg & 0x78000000) { /* Ignore baseT4 */ if (phy_reg & 0x20000000) dev->if_port = 5; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index c710dc17be90..8dd7bf9014ec 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -340,7 +340,7 @@ enum wake_event_bits { struct netdev_desc { __le32 next_desc; __le32 status; - struct desc_frag { __le32 addr, length; } frag[1]; + struct desc_frag { __le32 addr, length; } frag; }; /* Bits in netdev_desc.status */ @@ -980,8 +980,8 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue) le32_to_cpu(np->tx_ring[i].next_desc), le32_to_cpu(np->tx_ring[i].status), (le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff, - le32_to_cpu(np->tx_ring[i].frag[0].addr), - le32_to_cpu(np->tx_ring[i].frag[0].length)); + le32_to_cpu(np->tx_ring[i].frag.addr), + le32_to_cpu(np->tx_ring[i].frag.length)); } printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n", ioread32(np->base + TxListPtr), @@ -1027,7 +1027,7 @@ static void init_ring(struct net_device *dev) np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma + ((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring)); np->rx_ring[i].status = 0; - np->rx_ring[i].frag[0].length = 0; + np->rx_ring[i].frag.length = 0; np->rx_skbuff[i] = NULL; } @@ -1039,16 +1039,16 @@ static void init_ring(struct net_device *dev) if (skb == NULL) break; skb_reserve(skb, 2); /* 16 byte align the IP header. */ - np->rx_ring[i].frag[0].addr = cpu_to_le32( + np->rx_ring[i].frag.addr = cpu_to_le32( dma_map_single(&np->pci_dev->dev, skb->data, np->rx_buf_sz, DMA_FROM_DEVICE)); if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[i].frag[0].addr)) { + np->rx_ring[i].frag.addr)) { dev_kfree_skb(skb); np->rx_skbuff[i] = NULL; break; } - np->rx_ring[i].frag[0].length = cpu_to_le32(np->rx_buf_sz | LastFrag); + np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); } np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); @@ -1097,12 +1097,12 @@ start_tx (struct sk_buff *skb, struct net_device *dev) txdesc->next_desc = 0; txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign); - txdesc->frag[0].addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, + txdesc->frag.addr = cpu_to_le32(dma_map_single(&np->pci_dev->dev, skb->data, skb->len, DMA_TO_DEVICE)); if (dma_mapping_error(&np->pci_dev->dev, - txdesc->frag[0].addr)) + txdesc->frag.addr)) goto drop_frame; - txdesc->frag[0].length = cpu_to_le32 (skb->len | LastFrag); + txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag); /* Increment cur_tx before tasklet_schedule() */ np->cur_tx++; @@ -1151,7 +1151,7 @@ reset_tx (struct net_device *dev) skb = np->tx_skbuff[i]; if (skb) { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[i].frag[0].addr), + le32_to_cpu(np->tx_ring[i].frag.addr), skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(skb); np->tx_skbuff[i] = NULL; @@ -1271,12 +1271,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) skb = np->tx_skbuff[entry]; /* Free the original skb. */ dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[entry].frag[0].addr), + le32_to_cpu(np->tx_ring[entry].frag.addr), skb->len, DMA_TO_DEVICE); dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; - np->tx_ring[entry].frag[0].addr = 0; - np->tx_ring[entry].frag[0].length = 0; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; } spin_unlock(&np->lock); } else { @@ -1290,12 +1290,12 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) skb = np->tx_skbuff[entry]; /* Free the original skb. */ dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[entry].frag[0].addr), + le32_to_cpu(np->tx_ring[entry].frag.addr), skb->len, DMA_TO_DEVICE); dev_consume_skb_irq(np->tx_skbuff[entry]); np->tx_skbuff[entry] = NULL; - np->tx_ring[entry].frag[0].addr = 0; - np->tx_ring[entry].frag[0].length = 0; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; } spin_unlock(&np->lock); } @@ -1372,16 +1372,16 @@ static void rx_poll(struct tasklet_struct *t) (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { skb_reserve(skb, 2); /* 16 byte align the IP header */ dma_sync_single_for_cpu(&np->pci_dev->dev, - le32_to_cpu(desc->frag[0].addr), + le32_to_cpu(desc->frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); dma_sync_single_for_device(&np->pci_dev->dev, - le32_to_cpu(desc->frag[0].addr), + le32_to_cpu(desc->frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); skb_put(skb, pkt_len); } else { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(desc->frag[0].addr), + le32_to_cpu(desc->frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); skb_put(skb = np->rx_skbuff[entry], pkt_len); np->rx_skbuff[entry] = NULL; @@ -1427,18 +1427,18 @@ static void refill_rx (struct net_device *dev) if (skb == NULL) break; /* Better luck next round. */ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ - np->rx_ring[entry].frag[0].addr = cpu_to_le32( + np->rx_ring[entry].frag.addr = cpu_to_le32( dma_map_single(&np->pci_dev->dev, skb->data, np->rx_buf_sz, DMA_FROM_DEVICE)); if (dma_mapping_error(&np->pci_dev->dev, - np->rx_ring[entry].frag[0].addr)) { + np->rx_ring[entry].frag.addr)) { dev_kfree_skb_irq(skb); np->rx_skbuff[entry] = NULL; break; } } /* Perhaps we need not reset this field. */ - np->rx_ring[entry].frag[0].length = + np->rx_ring[entry].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); np->rx_ring[entry].status = 0; cnt++; @@ -1870,14 +1870,14 @@ static int netdev_close(struct net_device *dev) (int)(np->tx_ring_dma)); for (i = 0; i < TX_RING_SIZE; i++) printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n", - i, np->tx_ring[i].status, np->tx_ring[i].frag[0].addr, - np->tx_ring[i].frag[0].length); + i, np->tx_ring[i].status, np->tx_ring[i].frag.addr, + np->tx_ring[i].frag.length); printk(KERN_DEBUG " Rx ring %8.8x:\n", (int)(np->rx_ring_dma)); for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) { printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n", - i, np->rx_ring[i].status, np->rx_ring[i].frag[0].addr, - np->rx_ring[i].frag[0].length); + i, np->rx_ring[i].status, np->rx_ring[i].frag.addr, + np->rx_ring[i].frag.length); } } #endif /* __i386__ debugging only */ @@ -1892,19 +1892,19 @@ static int netdev_close(struct net_device *dev) skb = np->rx_skbuff[i]; if (skb) { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->rx_ring[i].frag[0].addr), + le32_to_cpu(np->rx_ring[i].frag.addr), np->rx_buf_sz, DMA_FROM_DEVICE); dev_kfree_skb(skb); np->rx_skbuff[i] = NULL; } - np->rx_ring[i].frag[0].addr = cpu_to_le32(0xBADF00D0); /* poison */ + np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */ } for (i = 0; i < TX_RING_SIZE; i++) { np->tx_ring[i].next_desc = 0; skb = np->tx_skbuff[i]; if (skb) { dma_unmap_single(&np->pci_dev->dev, - le32_to_cpu(np->tx_ring[i].frag[0].addr), + le32_to_cpu(np->tx_ring[i].frag.addr), skb->len, DMA_TO_DEVICE); dev_kfree_skb(skb); np->tx_skbuff[i] = NULL; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 0f90d2d5bb60..939fa9db6a2e 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -18,6 +18,7 @@ #include <linux/ptp_classify.h> #include <net/pkt_cls.h> #include <net/sock.h> +#include <net/tso.h> #include "dpaa2-eth.h" @@ -34,6 +35,75 @@ MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver"); struct ptp_qoriq *dpaa2_ptp; EXPORT_SYMBOL(dpaa2_ptp); +static void dpaa2_eth_detect_features(struct dpaa2_eth_priv *priv) +{ + priv->features = 0; + + if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_PTP_ONESTEP_VER_MAJOR, + DPNI_PTP_ONESTEP_VER_MINOR) >= 0) + priv->features |= DPAA2_ETH_FEATURE_ONESTEP_CFG_DIRECT; +} + +static void dpaa2_update_ptp_onestep_indirect(struct dpaa2_eth_priv *priv, + u32 offset, u8 udp) +{ + struct dpni_single_step_cfg cfg; + + cfg.en = 1; + cfg.ch_update = udp; + cfg.offset = offset; + cfg.peer_delay = 0; + + if (dpni_set_single_step_cfg(priv->mc_io, 0, priv->mc_token, &cfg)) + WARN_ONCE(1, "Failed to set single step register"); +} + +static void dpaa2_update_ptp_onestep_direct(struct dpaa2_eth_priv *priv, + u32 offset, u8 udp) +{ + u32 val = 0; + + val = DPAA2_PTP_SINGLE_STEP_ENABLE | + DPAA2_PTP_SINGLE_CORRECTION_OFF(offset); + + if (udp) + val |= DPAA2_PTP_SINGLE_STEP_CH; + + if (priv->onestep_reg_base) + writel(val, priv->onestep_reg_base); +} + +static void dpaa2_ptp_onestep_reg_update_method(struct dpaa2_eth_priv *priv) +{ + struct device *dev = priv->net_dev->dev.parent; + struct dpni_single_step_cfg ptp_cfg; + + priv->dpaa2_set_onestep_params_cb = dpaa2_update_ptp_onestep_indirect; + + if (!(priv->features & DPAA2_ETH_FEATURE_ONESTEP_CFG_DIRECT)) + return; + + if (dpni_get_single_step_cfg(priv->mc_io, 0, + priv->mc_token, &ptp_cfg)) { + dev_err(dev, "dpni_get_single_step_cfg cannot retrieve onestep reg, falling back to indirect update\n"); + return; + } + + if (!ptp_cfg.ptp_onestep_reg_base) { + dev_err(dev, "1588 onestep reg not available, falling back to indirect update\n"); + return; + } + + priv->onestep_reg_base = ioremap(ptp_cfg.ptp_onestep_reg_base, + sizeof(u32)); + if (!priv->onestep_reg_base) { + dev_err(dev, "1588 onestep reg cannot be mapped, falling back to indirect update\n"); + return; + } + + priv->dpaa2_set_onestep_params_cb = dpaa2_update_ptp_onestep_direct; +} + static void *dpaa2_iova_to_virt(struct iommu_domain *domain, dma_addr_t iova_addr) { @@ -695,7 +765,6 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv, struct sk_buff *skb) { struct ptp_tstamp origin_timestamp; - struct dpni_single_step_cfg cfg; u8 msgtype, twostep, udp; struct dpaa2_faead *faead; struct dpaa2_fas *fas; @@ -749,17 +818,48 @@ static void dpaa2_eth_enable_tx_tstamp(struct dpaa2_eth_priv *priv, htonl(origin_timestamp.sec_lsb); *(__be32 *)(data + offset2 + 6) = htonl(origin_timestamp.nsec); - cfg.en = 1; - cfg.ch_update = udp; - cfg.offset = offset1; - cfg.peer_delay = 0; + if (priv->ptp_correction_off == offset1) + return; + + priv->dpaa2_set_onestep_params_cb(priv, offset1, udp); + priv->ptp_correction_off = offset1; - if (dpni_set_single_step_cfg(priv->mc_io, 0, priv->mc_token, - &cfg)) - WARN_ONCE(1, "Failed to set single step register"); } } +static void *dpaa2_eth_sgt_get(struct dpaa2_eth_priv *priv) +{ + struct dpaa2_eth_sgt_cache *sgt_cache; + void *sgt_buf = NULL; + int sgt_buf_size; + + sgt_cache = this_cpu_ptr(priv->sgt_cache); + sgt_buf_size = priv->tx_data_offset + + DPAA2_ETH_SG_ENTRIES_MAX * sizeof(struct dpaa2_sg_entry); + + if (sgt_cache->count == 0) + sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); + else + sgt_buf = sgt_cache->buf[--sgt_cache->count]; + if (!sgt_buf) + return NULL; + + memset(sgt_buf, 0, sgt_buf_size); + + return sgt_buf; +} + +static void dpaa2_eth_sgt_recycle(struct dpaa2_eth_priv *priv, void *sgt_buf) +{ + struct dpaa2_eth_sgt_cache *sgt_cache; + + sgt_cache = this_cpu_ptr(priv->sgt_cache); + if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) + skb_free_frag(sgt_buf); + else + sgt_cache->buf[sgt_cache->count++] = sgt_buf; +} + /* Create a frame descriptor based on a fragmented skb */ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, struct sk_buff *skb, @@ -805,12 +905,11 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, /* Prepare the HW SGT structure */ sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry) * num_dma_bufs; - sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); + sgt_buf = dpaa2_eth_sgt_get(priv); if (unlikely(!sgt_buf)) { err = -ENOMEM; goto sgt_buf_alloc_failed; } - memset(sgt_buf, 0, sgt_buf_size); sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); @@ -846,6 +945,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, err = -ENOMEM; goto dma_map_single_failed; } + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_offset(fd, priv->tx_data_offset); dpaa2_fd_set_format(fd, dpaa2_fd_sg); dpaa2_fd_set_addr(fd, addr); @@ -855,7 +955,7 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, return 0; dma_map_single_failed: - skb_free_frag(sgt_buf); + dpaa2_eth_sgt_recycle(priv, sgt_buf); sgt_buf_alloc_failed: dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL); dma_map_sg_failed: @@ -875,7 +975,6 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, void **swa_addr) { struct device *dev = priv->net_dev->dev.parent; - struct dpaa2_eth_sgt_cache *sgt_cache; struct dpaa2_sg_entry *sgt; struct dpaa2_eth_swa *swa; dma_addr_t addr, sgt_addr; @@ -884,18 +983,10 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, int err; /* Prepare the HW SGT structure */ - sgt_cache = this_cpu_ptr(priv->sgt_cache); sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry); - - if (sgt_cache->count == 0) - sgt_buf = kzalloc(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN, - GFP_ATOMIC); - else - sgt_buf = sgt_cache->buf[--sgt_cache->count]; + sgt_buf = dpaa2_eth_sgt_get(priv); if (unlikely(!sgt_buf)) return -ENOMEM; - - sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN); sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); addr = dma_map_single(dev, skb->data, skb->len, DMA_BIDIRECTIONAL); @@ -923,6 +1014,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, goto sgt_map_failed; } + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_offset(fd, priv->tx_data_offset); dpaa2_fd_set_format(fd, dpaa2_fd_sg); dpaa2_fd_set_addr(fd, sgt_addr); @@ -934,10 +1026,7 @@ static int dpaa2_eth_build_sg_fd_single_buf(struct dpaa2_eth_priv *priv, sgt_map_failed: dma_unmap_single(dev, addr, skb->len, DMA_BIDIRECTIONAL); data_map_failed: - if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) - kfree(sgt_buf); - else - sgt_cache->buf[sgt_cache->count++] = sgt_buf; + dpaa2_eth_sgt_recycle(priv, sgt_buf); return err; } @@ -978,6 +1067,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv, if (unlikely(dma_mapping_error(dev, addr))) return -ENOMEM; + memset(fd, 0, sizeof(struct dpaa2_fd)); dpaa2_fd_set_addr(fd, addr); dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start)); dpaa2_fd_set_len(fd, skb->len); @@ -1005,9 +1095,9 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, struct dpaa2_eth_swa *swa; u8 fd_format = dpaa2_fd_get_format(fd); u32 fd_len = dpaa2_fd_get_len(fd); - - struct dpaa2_eth_sgt_cache *sgt_cache; struct dpaa2_sg_entry *sgt; + int should_free_skb = 1; + int i; fd_addr = dpaa2_fd_get_addr(fd); buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr); @@ -1039,6 +1129,28 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, /* Unmap the SGT buffer */ dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, DMA_BIDIRECTIONAL); + } else if (swa->type == DPAA2_ETH_SWA_SW_TSO) { + skb = swa->tso.skb; + + sgt = (struct dpaa2_sg_entry *)(buffer_start + + priv->tx_data_offset); + + /* Unmap and free the header */ + dma_unmap_single(dev, dpaa2_sg_get_addr(sgt), TSO_HEADER_SIZE, + DMA_TO_DEVICE); + kfree(dpaa2_iova_to_virt(priv->iommu_domain, dpaa2_sg_get_addr(sgt))); + + /* Unmap the other SG entries for the data */ + for (i = 1; i < swa->tso.num_sg; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the SGT buffer */ + dma_unmap_single(dev, fd_addr, swa->sg.sgt_size, + DMA_BIDIRECTIONAL); + + if (!swa->tso.is_last_fd) + should_free_skb = 0; } else { skb = swa->single.skb; @@ -1067,55 +1179,195 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv, } /* Get the timestamp value */ - if (skb->cb[0] == TX_TSTAMP) { - struct skb_shared_hwtstamps shhwtstamps; - __le64 *ts = dpaa2_get_ts(buffer_start, true); - u64 ns; - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - - ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); - shhwtstamps.hwtstamp = ns_to_ktime(ns); - skb_tstamp_tx(skb, &shhwtstamps); - } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { - mutex_unlock(&priv->onestep_tstamp_lock); + if (swa->type != DPAA2_ETH_SWA_SW_TSO) { + if (skb->cb[0] == TX_TSTAMP) { + struct skb_shared_hwtstamps shhwtstamps; + __le64 *ts = dpaa2_get_ts(buffer_start, true); + u64 ns; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + + ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(skb, &shhwtstamps); + } else if (skb->cb[0] == TX_TSTAMP_ONESTEP_SYNC) { + mutex_unlock(&priv->onestep_tstamp_lock); + } } /* Free SGT buffer allocated on tx */ - if (fd_format != dpaa2_fd_single) { - sgt_cache = this_cpu_ptr(priv->sgt_cache); - if (swa->type == DPAA2_ETH_SWA_SG) { - skb_free_frag(buffer_start); - } else { - if (sgt_cache->count >= DPAA2_ETH_SGT_CACHE_SIZE) - kfree(buffer_start); - else - sgt_cache->buf[sgt_cache->count++] = buffer_start; + if (fd_format != dpaa2_fd_single) + dpaa2_eth_sgt_recycle(priv, buffer_start); + + /* Move on with skb release. If we are just confirming multiple FDs + * from the same TSO skb then only the last one will need to free the + * skb. + */ + if (should_free_skb) + napi_consume_skb(skb, in_napi); +} + +static int dpaa2_eth_build_gso_fd(struct dpaa2_eth_priv *priv, + struct sk_buff *skb, struct dpaa2_fd *fd, + int *num_fds, u32 *total_fds_len) +{ + struct device *dev = priv->net_dev->dev.parent; + int hdr_len, total_len, data_left, fd_len; + int num_sge, err, i, sgt_buf_size; + struct dpaa2_fd *fd_start = fd; + struct dpaa2_sg_entry *sgt; + struct dpaa2_eth_swa *swa; + dma_addr_t sgt_addr, addr; + dma_addr_t tso_hdr_dma; + unsigned int index = 0; + struct tso_t tso; + char *tso_hdr; + void *sgt_buf; + + /* Initialize the TSO handler, and prepare the first payload */ + hdr_len = tso_start(skb, &tso); + *total_fds_len = 0; + + total_len = skb->len - hdr_len; + while (total_len > 0) { + /* Prepare the HW SGT structure for this frame */ + sgt_buf = dpaa2_eth_sgt_get(priv); + if (unlikely(!sgt_buf)) { + netdev_err(priv->net_dev, "dpaa2_eth_sgt_get() failed\n"); + err = -ENOMEM; + goto err_sgt_get; + } + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + + /* Determine the data length of this frame */ + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + fd_len = data_left + hdr_len; + + /* Prepare packet headers: MAC + IP + TCP */ + tso_hdr = kmalloc(TSO_HEADER_SIZE, GFP_ATOMIC); + if (!tso_hdr) { + err = -ENOMEM; + goto err_alloc_tso_hdr; + } + + tso_build_hdr(skb, tso_hdr, &tso, data_left, total_len == 0); + tso_hdr_dma = dma_map_single(dev, tso_hdr, TSO_HEADER_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dev, tso_hdr_dma)) { + netdev_err(priv->net_dev, "dma_map_single(tso_hdr) failed\n"); + err = -ENOMEM; + goto err_map_tso_hdr; + } + + /* Setup the SG entry for the header */ + dpaa2_sg_set_addr(sgt, tso_hdr_dma); + dpaa2_sg_set_len(sgt, hdr_len); + dpaa2_sg_set_final(sgt, data_left <= 0); + + /* Compose the SG entries for each fragment of data */ + num_sge = 1; + while (data_left > 0) { + int size; + + /* Move to the next SG entry */ + sgt++; + size = min_t(int, tso.size, data_left); + + addr = dma_map_single(dev, tso.data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, addr)) { + netdev_err(priv->net_dev, "dma_map_single(tso.data) failed\n"); + err = -ENOMEM; + goto err_map_data; + } + dpaa2_sg_set_addr(sgt, addr); + dpaa2_sg_set_len(sgt, size); + dpaa2_sg_set_final(sgt, size == data_left); + + num_sge++; + + /* Build the data for the __next__ fragment */ + data_left -= size; + tso_build_data(skb, &tso, size); + } + + /* Store the skb backpointer in the SGT buffer */ + sgt_buf_size = priv->tx_data_offset + num_sge * sizeof(struct dpaa2_sg_entry); + swa = (struct dpaa2_eth_swa *)sgt_buf; + swa->type = DPAA2_ETH_SWA_SW_TSO; + swa->tso.skb = skb; + swa->tso.num_sg = num_sge; + swa->tso.sgt_size = sgt_buf_size; + swa->tso.is_last_fd = total_len == 0 ? 1 : 0; + + /* Separately map the SGT buffer */ + sgt_addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(dev, sgt_addr))) { + netdev_err(priv->net_dev, "dma_map_single(sgt_buf) failed\n"); + err = -ENOMEM; + goto err_map_sgt; } + + /* Setup the frame descriptor */ + memset(fd, 0, sizeof(struct dpaa2_fd)); + dpaa2_fd_set_offset(fd, priv->tx_data_offset); + dpaa2_fd_set_format(fd, dpaa2_fd_sg); + dpaa2_fd_set_addr(fd, sgt_addr); + dpaa2_fd_set_len(fd, fd_len); + dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA); + + *total_fds_len += fd_len; + /* Advance to the next frame descriptor */ + fd++; + index++; } - /* Move on with skb release */ - napi_consume_skb(skb, in_napi); + *num_fds = index; + + return 0; + +err_map_sgt: +err_map_data: + /* Unmap all the data S/G entries for the current FD */ + sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); + for (i = 1; i < num_sge; i++) + dma_unmap_single(dev, dpaa2_sg_get_addr(&sgt[i]), + dpaa2_sg_get_len(&sgt[i]), DMA_TO_DEVICE); + + /* Unmap the header entry */ + dma_unmap_single(dev, tso_hdr_dma, TSO_HEADER_SIZE, DMA_TO_DEVICE); +err_map_tso_hdr: + kfree(tso_hdr); +err_alloc_tso_hdr: + dpaa2_eth_sgt_recycle(priv, sgt_buf); +err_sgt_get: + /* Free all the other FDs that were already fully created */ + for (i = 0; i < index; i++) + dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false); + + return err; } static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); - struct dpaa2_fd fd; - struct rtnl_link_stats64 *percpu_stats; + int total_enqueued = 0, retries = 0, enqueued; struct dpaa2_eth_drv_stats *percpu_extras; + struct rtnl_link_stats64 *percpu_stats; + unsigned int needed_headroom; + int num_fds = 1, max_retries; struct dpaa2_eth_fq *fq; struct netdev_queue *nq; + struct dpaa2_fd *fd; u16 queue_mapping; - unsigned int needed_headroom; - u32 fd_len; + void *swa = NULL; u8 prio = 0; int err, i; - void *swa; + u32 fd_len; percpu_stats = this_cpu_ptr(priv->percpu_stats); percpu_extras = this_cpu_ptr(priv->percpu_extras); + fd = (this_cpu_ptr(priv->fd))->array; needed_headroom = dpaa2_eth_needed_headroom(skb); @@ -1130,20 +1382,28 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, } /* Setup the FD fields */ - memset(&fd, 0, sizeof(fd)); - if (skb_is_nonlinear(skb)) { - err = dpaa2_eth_build_sg_fd(priv, skb, &fd, &swa); + if (skb_is_gso(skb)) { + err = dpaa2_eth_build_gso_fd(priv, skb, fd, &num_fds, &fd_len); + percpu_extras->tx_sg_frames += num_fds; + percpu_extras->tx_sg_bytes += fd_len; + percpu_extras->tx_tso_frames += num_fds; + percpu_extras->tx_tso_bytes += fd_len; + } else if (skb_is_nonlinear(skb)) { + err = dpaa2_eth_build_sg_fd(priv, skb, fd, &swa); percpu_extras->tx_sg_frames++; percpu_extras->tx_sg_bytes += skb->len; + fd_len = dpaa2_fd_get_len(fd); } else if (skb_headroom(skb) < needed_headroom) { - err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, &fd, &swa); + err = dpaa2_eth_build_sg_fd_single_buf(priv, skb, fd, &swa); percpu_extras->tx_sg_frames++; percpu_extras->tx_sg_bytes += skb->len; percpu_extras->tx_converted_sg_frames++; percpu_extras->tx_converted_sg_bytes += skb->len; + fd_len = dpaa2_fd_get_len(fd); } else { - err = dpaa2_eth_build_single_fd(priv, skb, &fd, &swa); + err = dpaa2_eth_build_single_fd(priv, skb, fd, &swa); + fd_len = dpaa2_fd_get_len(fd); } if (unlikely(err)) { @@ -1151,11 +1411,12 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, goto err_build_fd; } - if (skb->cb[0]) - dpaa2_eth_enable_tx_tstamp(priv, &fd, swa, skb); + if (swa && skb->cb[0]) + dpaa2_eth_enable_tx_tstamp(priv, fd, swa, skb); /* Tracing point */ - trace_dpaa2_tx_fd(net_dev, &fd); + for (i = 0; i < num_fds; i++) + trace_dpaa2_tx_fd(net_dev, &fd[i]); /* TxConf FQ selection relies on queue id from the stack. * In case of a forwarded frame from another DPNI interface, we choose @@ -1175,27 +1436,32 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb, queue_mapping %= dpaa2_eth_queue_count(priv); } fq = &priv->fq[queue_mapping]; - - fd_len = dpaa2_fd_get_len(&fd); nq = netdev_get_tx_queue(net_dev, queue_mapping); netdev_tx_sent_queue(nq, fd_len); /* Everything that happens after this enqueues might race with * the Tx confirmation callback for this frame */ - for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { - err = priv->enqueue(priv, fq, &fd, prio, 1, NULL); - if (err != -EBUSY) - break; + max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES; + while (total_enqueued < num_fds && retries < max_retries) { + err = priv->enqueue(priv, fq, &fd[total_enqueued], + prio, num_fds - total_enqueued, &enqueued); + if (err == -EBUSY) { + retries++; + continue; + } + + total_enqueued += enqueued; } - percpu_extras->tx_portal_busy += i; + percpu_extras->tx_portal_busy += retries; + if (unlikely(err < 0)) { percpu_stats->tx_errors++; /* Clean up everything, including freeing the skb */ - dpaa2_eth_free_tx_fd(priv, fq, &fd, false); + dpaa2_eth_free_tx_fd(priv, fq, fd, false); netdev_tx_completed_queue(nq, 1, fd_len); } else { - percpu_stats->tx_packets++; + percpu_stats->tx_packets += total_enqueued; percpu_stats->tx_bytes += fd_len; } @@ -1523,7 +1789,7 @@ static void dpaa2_eth_sgt_cache_drain(struct dpaa2_eth_priv *priv) count = sgt_cache->count; for (i = 0; i < count; i++) - kfree(sgt_cache->buf[i]); + skb_free_frag(sgt_cache->buf[i]); sgt_cache->count = 0; } } @@ -2207,6 +2473,9 @@ static int dpaa2_eth_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) config.rx_filter = HWTSTAMP_FILTER_ALL; } + if (priv->tx_tstamp_type == HWTSTAMP_TX_ONESTEP_SYNC) + dpaa2_ptp_onestep_reg_update_method(priv); + return copy_to_user(rq->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; } @@ -4100,6 +4369,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) return err; } + dpaa2_eth_detect_features(priv); + /* Capabilities listing */ supported |= IFF_LIVE_ADDR_CHANGE; @@ -4115,7 +4386,8 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) net_dev->features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_LLTX | NETIF_F_HW_TC; + NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO; + net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS; net_dev->hw_features = net_dev->features; if (priv->dpni_attrs.vlan_filter_entries) @@ -4397,6 +4669,13 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) goto err_alloc_sgt_cache; } + priv->fd = alloc_percpu(*priv->fd); + if (!priv->fd) { + dev_err(dev, "alloc_percpu(fds) failed\n"); + err = -ENOMEM; + goto err_alloc_fds; + } + err = dpaa2_eth_netdev_init(net_dev); if (err) goto err_netdev_init; @@ -4484,6 +4763,8 @@ err_poll_thread: err_alloc_rings: err_csum: err_netdev_init: + free_percpu(priv->fd); +err_alloc_fds: free_percpu(priv->sgt_cache); err_alloc_sgt_cache: free_percpu(priv->percpu_extras); @@ -4539,6 +4820,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_free_irqs(ls_dev); dpaa2_eth_free_rings(priv); + free_percpu(priv->fd); free_percpu(priv->sgt_cache); free_percpu(priv->percpu_stats); free_percpu(priv->percpu_extras); @@ -4547,6 +4829,8 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) dpaa2_eth_free_dpbp(priv); dpaa2_eth_free_dpio(priv); dpaa2_eth_free_dpni(priv); + if (priv->onestep_reg_base) + iounmap(priv->onestep_reg_base); fsl_mc_portal_free(priv->mc_io); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index e54e70ebdd05..447718483ef4 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -122,6 +122,7 @@ enum dpaa2_eth_swa_type { DPAA2_ETH_SWA_SINGLE, DPAA2_ETH_SWA_SG, DPAA2_ETH_SWA_XDP, + DPAA2_ETH_SWA_SW_TSO, }; /* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */ @@ -142,6 +143,12 @@ struct dpaa2_eth_swa { int dma_size; struct xdp_frame *xdpf; } xdp; + struct { + struct sk_buff *skb; + int num_sg; + int sgt_size; + int is_last_fd; + } tso; }; }; @@ -354,6 +361,8 @@ struct dpaa2_eth_drv_stats { __u64 tx_conf_bytes; __u64 tx_sg_frames; __u64 tx_sg_bytes; + __u64 tx_tso_frames; + __u64 tx_tso_bytes; __u64 rx_sg_frames; __u64 rx_sg_bytes; /* Linear skbs sent as a S/G FD due to insufficient headroom */ @@ -493,8 +502,15 @@ struct dpaa2_eth_trap_data { struct dpaa2_eth_priv *priv; }; +#define DPAA2_ETH_SG_ENTRIES_MAX (PAGE_SIZE / sizeof(struct scatterlist)) + #define DPAA2_ETH_DEFAULT_COPYBREAK 512 +#define DPAA2_ETH_ENQUEUE_MAX_FDS 200 +struct dpaa2_eth_fds { + struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS]; +}; + /* Driver private data */ struct dpaa2_eth_priv { struct net_device *net_dev; @@ -510,12 +526,15 @@ struct dpaa2_eth_priv { u8 num_channels; struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS]; struct dpaa2_eth_sgt_cache __percpu *sgt_cache; - + unsigned long features; struct dpni_attr dpni_attrs; u16 dpni_ver_major; u16 dpni_ver_minor; u16 tx_data_offset; - + void __iomem *onestep_reg_base; + u8 ptp_correction_off; + void (*dpaa2_set_onestep_params_cb)(struct dpaa2_eth_priv *priv, + u32 offset, u8 udp); struct fsl_mc_device *dpbp_dev; u16 rx_buf_size; u16 bpid; @@ -577,6 +596,8 @@ struct dpaa2_eth_priv { struct devlink_port devlink_port; u32 rx_copybreak; + + struct dpaa2_eth_fds __percpu *fd; }; struct dpaa2_eth_devlink_priv { @@ -655,6 +676,13 @@ enum dpaa2_eth_rx_dist { #define DPAA2_ETH_DIST_L4DST BIT(8) #define DPAA2_ETH_DIST_ALL (~0ULL) +#define DPNI_PTP_ONESTEP_VER_MAJOR 8 +#define DPNI_PTP_ONESTEP_VER_MINOR 2 +#define DPAA2_ETH_FEATURE_ONESTEP_CFG_DIRECT BIT(0) +#define DPAA2_PTP_SINGLE_STEP_ENABLE BIT(31) +#define DPAA2_PTP_SINGLE_STEP_CH BIT(7) +#define DPAA2_PTP_SINGLE_CORRECTION_OFF(v) ((v) << 8) + #define DPNI_PAUSE_VER_MAJOR 7 #define DPNI_PAUSE_VER_MINOR 13 #define dpaa2_eth_has_pause_support(priv) \ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 3fdbf87dccb1..eea7d7a07c00 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -44,6 +44,8 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = { "[drv] tx conf bytes", "[drv] tx sg frames", "[drv] tx sg bytes", + "[drv] tx tso frames", + "[drv] tx tso bytes", "[drv] rx sg frames", "[drv] rx sg bytes", "[drv] tx converted sg frames", diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 623d113b6581..521f036d1c00 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -100,6 +100,14 @@ static int dpaa2_mac_get_if_mode(struct fwnode_handle *dpmac_node, return err; } +static struct phylink_pcs *dpaa2_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct dpaa2_mac *mac = phylink_to_dpaa2_mac(config); + + return mac->pcs; +} + static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -172,6 +180,7 @@ static void dpaa2_mac_link_down(struct phylink_config *config, static const struct phylink_mac_ops dpaa2_mac_phylink_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = dpaa2_mac_select_pcs, .mac_config = dpaa2_mac_config, .mac_link_up = dpaa2_mac_link_up, .mac_link_down = dpaa2_mac_link_down, @@ -303,9 +312,6 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) } mac->phylink = phylink; - if (mac->pcs) - phylink_set_pcs(mac->phylink, mac->pcs); - err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0); if (err) { netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h index 9f80bdfeedec..828f538097af 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h @@ -98,7 +98,7 @@ #define DPNI_CMDID_GET_LINK_CFG DPNI_CMD(0x278) #define DPNI_CMDID_SET_SINGLE_STEP_CFG DPNI_CMD(0x279) -#define DPNI_CMDID_GET_SINGLE_STEP_CFG DPNI_CMD(0x27a) +#define DPNI_CMDID_GET_SINGLE_STEP_CFG DPNI_CMD_V2(0x27a) /* Macros for accessing command fields smaller than 1byte */ #define DPNI_MASK(field) \ @@ -658,12 +658,16 @@ struct dpni_cmd_single_step_cfg { __le16 flags; __le16 offset; __le32 peer_delay; + __le32 ptp_onestep_reg_base; + __le32 pad0; }; struct dpni_rsp_single_step_cfg { __le16 flags; __le16 offset; __le32 peer_delay; + __le32 ptp_onestep_reg_base; + __le32 pad0; }; struct dpni_cmd_enable_vlan_filter { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c index d6afada99fb6..6c3b36f20fb8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c @@ -2136,6 +2136,8 @@ int dpni_get_single_step_cfg(struct fsl_mc_io *mc_io, ptp_cfg->ch_update = dpni_get_field(le16_to_cpu(rsp_params->flags), PTP_CH_UPDATE) ? 1 : 0; ptp_cfg->peer_delay = le32_to_cpu(rsp_params->peer_delay); + ptp_cfg->ptp_onestep_reg_base = + le32_to_cpu(rsp_params->ptp_onestep_reg_base); return err; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h index 7de0562bbf59..6fffd519aa00 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h @@ -1074,12 +1074,18 @@ int dpni_set_tx_shaping(struct fsl_mc_io *mc_io, * @peer_delay: For peer-to-peer transparent clocks add this value to the * correction field in addition to the transient time update. * The value expresses nanoseconds. + * @ptp_onestep_reg_base: 1588 SINGLE_STEP register base address. This address + * is used to update directly the register contents. + * User has to create an address mapping for it. + * + * */ struct dpni_single_step_cfg { u8 en; u8 ch_update; u16 offset; u32 peer_delay; + u32 ptp_onestep_reg_base; }; int dpni_set_single_step_cfg(struct fsl_mc_io *mc_io, diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index fb39e406b7fc..68d806dc3701 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -18,6 +18,8 @@ #define ENETC_MAX_MTU (ENETC_MAC_MAXFRM_SIZE - \ (ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN)) +#define ENETC_CBD_DATA_MEM_ALIGN 64 + struct enetc_tx_swbd { union { struct sk_buff *skb; @@ -415,6 +417,42 @@ int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count); int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count); int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd); +static inline void *enetc_cbd_alloc_data_mem(struct enetc_si *si, + struct enetc_cbd *cbd, + int size, dma_addr_t *dma, + void **data_align) +{ + struct enetc_cbdr *ring = &si->cbd_ring; + dma_addr_t dma_align; + void *data; + + data = dma_alloc_coherent(ring->dma_dev, + size + ENETC_CBD_DATA_MEM_ALIGN, + dma, GFP_KERNEL); + if (!data) { + dev_err(ring->dma_dev, "CBD alloc data memory failed!\n"); + return NULL; + } + + dma_align = ALIGN(*dma, ENETC_CBD_DATA_MEM_ALIGN); + *data_align = PTR_ALIGN(data, ENETC_CBD_DATA_MEM_ALIGN); + + cbd->addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd->addr[1] = cpu_to_le32(upper_32_bits(dma_align)); + cbd->length = cpu_to_le16(size); + + return data; +} + +static inline void enetc_cbd_free_data_mem(struct enetc_si *si, int size, + void *data, dma_addr_t *dma) +{ + struct enetc_cbdr *ring = &si->cbd_ring; + + dma_free_coherent(ring->dma_dev, size + ENETC_CBD_DATA_MEM_ALIGN, + data, *dma); +} + #ifdef CONFIG_FSL_ENETC_QOS int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data); void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c index 073e56dcca4e..af68dc46a795 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c @@ -166,70 +166,55 @@ int enetc_set_mac_flt_entry(struct enetc_si *si, int index, return enetc_send_cmd(si, &cbd); } -#define RFSE_ALIGN 64 /* Set entry in RFS table */ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, int index) { struct enetc_cbdr *ring = &si->cbd_ring; struct enetc_cbd cbd = {.cmd = 0}; - dma_addr_t dma, dma_align; void *tmp, *tmp_align; + dma_addr_t dma; int err; /* fill up the "set" descriptor */ cbd.cmd = 0; cbd.cls = 4; cbd.index = cpu_to_le16(index); - cbd.length = cpu_to_le16(sizeof(*rfse)); cbd.opt[3] = cpu_to_le32(0); /* SI */ - tmp = dma_alloc_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN, - &dma, GFP_KERNEL); - if (!tmp) { - dev_err(ring->dma_dev, "DMA mapping of RFS entry failed!\n"); + tmp = enetc_cbd_alloc_data_mem(si, &cbd, sizeof(*rfse), + &dma, &tmp_align); + if (!tmp) return -ENOMEM; - } - dma_align = ALIGN(dma, RFSE_ALIGN); - tmp_align = PTR_ALIGN(tmp, RFSE_ALIGN); memcpy(tmp_align, rfse, sizeof(*rfse)); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); - err = enetc_send_cmd(si, &cbd); if (err) dev_err(ring->dma_dev, "FS entry add failed (%d)!", err); - dma_free_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN, - tmp, dma); + enetc_cbd_free_data_mem(si, sizeof(*rfse), tmp, &dma); return err; } -#define RSSE_ALIGN 64 static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, bool read) { struct enetc_cbdr *ring = &si->cbd_ring; struct enetc_cbd cbd = {.cmd = 0}; - dma_addr_t dma, dma_align; u8 *tmp, *tmp_align; + dma_addr_t dma; int err, i; - if (count < RSSE_ALIGN) + if (count < ENETC_CBD_DATA_MEM_ALIGN) /* HW only takes in a full 64 entry table */ return -EINVAL; - tmp = dma_alloc_coherent(ring->dma_dev, count + RSSE_ALIGN, - &dma, GFP_KERNEL); - if (!tmp) { - dev_err(ring->dma_dev, "DMA mapping of RSS table failed!\n"); + tmp = enetc_cbd_alloc_data_mem(si, &cbd, count, + &dma, (void *)&tmp_align); + if (!tmp) return -ENOMEM; - } - dma_align = ALIGN(dma, RSSE_ALIGN); - tmp_align = PTR_ALIGN(tmp, RSSE_ALIGN); if (!read) for (i = 0; i < count; i++) @@ -238,10 +223,6 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, /* fill up the descriptor */ cbd.cmd = read ? 2 : 1; cbd.cls = 3; - cbd.length = cpu_to_le16(count); - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); err = enetc_send_cmd(si, &cbd); if (err) @@ -251,7 +232,7 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, for (i = 0; i < count; i++) table[i] = tmp_align[i]; - dma_free_coherent(ring->dma_dev, count + RSSE_ALIGN, tmp, dma); + enetc_cbd_free_data_mem(si, count, tmp, &dma); return err; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index ed16a5ac9ad0..a0c75c717073 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -934,18 +934,21 @@ static void enetc_mdiobus_destroy(struct enetc_pf *pf) enetc_imdio_remove(pf); } +static struct phylink_pcs * +enetc_pl_mac_select_pcs(struct phylink_config *config, phy_interface_t iface) +{ + struct enetc_pf *pf = phylink_to_enetc_pf(config); + + return pf->pcs; +} + static void enetc_pl_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { struct enetc_pf *pf = phylink_to_enetc_pf(config); - struct enetc_ndev_priv *priv; enetc_mac_config(&pf->si->hw, state->interface); - - priv = netdev_priv(pf->si->ndev); - if (pf->pcs) - phylink_set_pcs(priv->phylink, pf->pcs); } static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex) @@ -1062,6 +1065,7 @@ static void enetc_pl_mac_link_down(struct phylink_config *config, static const struct phylink_mac_ops enetc_mac_phylink_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = enetc_pl_mac_select_pcs, .mac_config = enetc_pl_mac_config, .mac_link_up = enetc_pl_mac_link_up, .mac_link_down = enetc_pl_mac_link_down, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 3555c12edb45..79afb1d7289b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -52,10 +52,11 @@ static int enetc_setup_taprio(struct net_device *ndev, struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; struct tgs_gcl_data *gcl_data; - struct gce *gce; dma_addr_t dma; + struct gce *gce; u16 data_size; u16 gcl_len; + void *tmp; u32 tge; int err; int i; @@ -82,8 +83,9 @@ static int enetc_setup_taprio(struct net_device *ndev, gcl_config = &cbd.gcl_conf; data_size = struct_size(gcl_data, entry, gcl_len); - gcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!gcl_data) + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&gcl_data); + if (!tmp) return -ENOMEM; gce = (struct gce *)(gcl_data + 1); @@ -107,19 +109,8 @@ static int enetc_setup_taprio(struct net_device *ndev, temp_gce->period = cpu_to_le32(temp_entry->interval); } - cbd.length = cpu_to_le16(data_size); cbd.status_flags = 0; - dma = dma_map_single(&priv->si->pdev->dev, gcl_data, - data_size, DMA_TO_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(gcl_data); - return -ENOMEM; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; @@ -132,8 +123,7 @@ static int enetc_setup_taprio(struct net_device *ndev, ENETC_QBV_PTGCR_OFFSET, tge & (~ENETC_QBV_TGE)); - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE); - kfree(gcl_data); + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); return err; } @@ -450,6 +440,7 @@ static struct actions_fwd enetc_act_fwd[] = { }; static struct enetc_psfp epsfp = { + .dev_bitmap = 0, .psfp_sfi_bitmap = NULL, }; @@ -463,8 +454,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, struct enetc_cbd cbd = {.cmd = 0}; struct streamid_data *si_data; struct streamid_conf *si_conf; - u16 data_size; dma_addr_t dma; + u16 data_size; + void *tmp; int port; int err; @@ -485,21 +477,11 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct streamid_data); - si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!si_data) + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&si_data); + if (!tmp) return -ENOMEM; - cbd.length = cpu_to_le16(data_size); - dma = dma_map_single(&priv->si->pdev->dev, si_data, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto out; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); eth_broadcast_addr(si_data->dmac); si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK + ((0x3 << 14) | ENETC_CBDR_SID_VIDM)); @@ -520,11 +502,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, goto out; /* Enable the entry overwrite again incase space flushed by hardware */ - memset(&cbd, 0, sizeof(cbd)); - - cbd.index = cpu_to_le16((u16)sid->index); - cbd.cmd = 0; - cbd.cls = BDCR_CMD_STREAM_IDENTIFY; cbd.status_flags = 0; si_conf->en = 0x80; @@ -537,11 +514,6 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, memset(si_data, 0, data_size); - cbd.length = cpu_to_le16(data_size); - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); - /* VIDM default to be 1. * VID Match. If set (b1) then the VID must match, otherwise * any VID is considered a match. VIDM setting is only used @@ -561,10 +533,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); out: - if (!dma_mapping_error(&priv->si->pdev->dev, dma)) - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE); - - kfree(si_data); + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); return err; } @@ -635,6 +604,7 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, struct sfi_counter_data *data_buf; dma_addr_t dma; u16 data_size; + void *tmp; int err; cbd.index = cpu_to_le16((u16)index); @@ -643,21 +613,11 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct sfi_counter_data); - data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!data_buf) - return -ENOMEM; - - dma = dma_map_single(&priv->si->pdev->dev, data_buf, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto exit; - } - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); - cbd.length = cpu_to_le16(data_size); + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&data_buf); + if (!tmp) + return -ENOMEM; err = enetc_send_cmd(priv->si, &cbd); if (err) @@ -684,7 +644,8 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, data_buf->flow_meter_dropl; exit: - kfree(data_buf); + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); + return err; } @@ -726,6 +687,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, dma_addr_t dma; u16 data_size; int err, i; + void *tmp; u64 now; cbd.index = cpu_to_le16(sgi->index); @@ -772,25 +734,11 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3; data_size = struct_size(sgcl_data, sgcl, sgi->num_entries); - - sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!sgcl_data) + tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, + &dma, (void *)&sgcl_data); + if (!tmp) return -ENOMEM; - cbd.length = cpu_to_le16(data_size); - - dma = dma_map_single(&priv->si->pdev->dev, - sgcl_data, data_size, - DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(sgcl_data); - return -ENOMEM; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); - sgce = &sgcl_data->sgcl[0]; sgcl_config->agtst = 0x80; @@ -844,8 +792,7 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); exit: - kfree(sgcl_data); - + enetc_cbd_free_data_mem(priv->si, data_size, tmp, &dma); return err; } @@ -1074,6 +1021,46 @@ static struct actions_fwd *enetc_check_flow_actions(u64 acts, return NULL; } +static int enetc_psfp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, struct flow_cls_offload *f) { @@ -1230,11 +1217,10 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, /* Flow meter and max frame size */ if (entryp) { - if (entryp->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); - err = -EOPNOTSUPP; + err = enetc_psfp_policer_validate(&rule->action, entryp, extack); + if (err) goto free_sfi; - } + if (entryp->police.burst) { fmi = kzalloc(sizeof(*fmi), GFP_KERNEL); if (!fmi) { diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 796133de527e..11227f51404c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2797,7 +2797,7 @@ static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) int ret = 0; if (enable) { - ret = phy_init_eee(ndev->phydev, 0); + ret = phy_init_eee(ndev->phydev, false); if (ret) return ret; diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index af99017a5453..7d49c28215f3 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -101,7 +101,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable) u32 val, tempval; struct timespec64 ts; u64 ns; - val = 0; if (fep->pps_enable == enable) return 0; diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 266e562bd67a..ef8058a17188 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -14,6 +14,7 @@ #include <linux/acpi.h> #include <linux/acpi_mdio.h> +#include <linux/clk.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/mdio.h> @@ -36,9 +37,10 @@ struct tgec_mdio_controller { } __packed; #define MDIO_STAT_ENC BIT(6) -#define MDIO_STAT_CLKDIV(x) (((x>>1) & 0xff) << 8) +#define MDIO_STAT_CLKDIV(x) (((x) & 0x1ff) << 7) #define MDIO_STAT_BSY BIT(0) #define MDIO_STAT_RD_ER BIT(1) +#define MDIO_STAT_PRE_DIS BIT(5) #define MDIO_CTL_DEV_ADDR(x) (x & 0x1f) #define MDIO_CTL_PORT_ADDR(x) ((x & 0x1f) << 5) #define MDIO_CTL_PRE_DIS BIT(10) @@ -50,6 +52,8 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; + struct clk *enet_clk; + u32 mdc_freq; bool is_little_endian; bool has_a009885; bool has_a011043; @@ -254,6 +258,50 @@ irq_restore: return ret; } +static int xgmac_mdio_set_mdc_freq(struct mii_bus *bus) +{ + struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; + struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + struct device *dev = bus->parent; + u32 mdio_stat, div; + + if (device_property_read_u32(dev, "clock-frequency", &priv->mdc_freq)) + return 0; + + priv->enet_clk = devm_clk_get(dev, NULL); + if (IS_ERR(priv->enet_clk)) { + dev_err(dev, "Input clock unknown, not changing MDC frequency"); + return PTR_ERR(priv->enet_clk); + } + + div = ((clk_get_rate(priv->enet_clk) / priv->mdc_freq) - 1) / 2; + if (div < 5 || div > 0x1ff) { + dev_err(dev, "Requested MDC frequency is out of range, ignoring"); + return -EINVAL; + } + + mdio_stat = xgmac_read32(®s->mdio_stat, priv->is_little_endian); + mdio_stat &= ~MDIO_STAT_CLKDIV(0x1ff); + mdio_stat |= MDIO_STAT_CLKDIV(div); + xgmac_write32(mdio_stat, ®s->mdio_stat, priv->is_little_endian); + return 0; +} + +static void xgmac_mdio_set_suppress_preamble(struct mii_bus *bus) +{ + struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; + struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + struct device *dev = bus->parent; + u32 mdio_stat; + + if (!device_property_read_bool(dev, "suppress-preamble")) + return; + + mdio_stat = xgmac_read32(®s->mdio_stat, priv->is_little_endian); + mdio_stat |= MDIO_STAT_PRE_DIS; + xgmac_write32(mdio_stat, ®s->mdio_stat, priv->is_little_endian); +} + static int xgmac_mdio_probe(struct platform_device *pdev) { struct fwnode_handle *fwnode; @@ -273,7 +321,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev) return -EINVAL; } - bus = mdiobus_alloc_size(sizeof(struct mdio_fsl_priv)); + bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(struct mdio_fsl_priv)); if (!bus) return -ENOMEM; @@ -284,13 +332,11 @@ static int xgmac_mdio_probe(struct platform_device *pdev) bus->probe_capabilities = MDIOBUS_C22_C45; snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start); - /* Set the PHY base address */ priv = bus->priv; - priv->mdio_base = ioremap(res->start, resource_size(res)); - if (!priv->mdio_base) { - ret = -ENOMEM; - goto err_ioremap; - } + priv->mdio_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!priv->mdio_base) + return -ENOMEM; /* For both ACPI and DT cases, endianness of MDIO controller * needs to be specified using "little-endian" property. @@ -303,6 +349,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->has_a011043 = device_property_read_bool(&pdev->dev, "fsl,erratum-a011043"); + xgmac_mdio_set_suppress_preamble(bus); + + ret = xgmac_mdio_set_mdc_freq(bus); + if (ret) + return ret; + fwnode = pdev->dev.fwnode; if (is_of_node(fwnode)) ret = of_mdiobus_register(bus, to_of_node(fwnode)); @@ -312,32 +364,12 @@ static int xgmac_mdio_probe(struct platform_device *pdev) ret = -EINVAL; if (ret) { dev_err(&pdev->dev, "cannot register MDIO bus\n"); - goto err_registration; + return ret; } platform_set_drvdata(pdev, bus); return 0; - -err_registration: - iounmap(priv->mdio_base); - -err_ioremap: - mdiobus_free(bus); - - return ret; -} - -static int xgmac_mdio_remove(struct platform_device *pdev) -{ - struct mii_bus *bus = platform_get_drvdata(pdev); - struct mdio_fsl_priv *priv = bus->priv; - - mdiobus_unregister(bus); - iounmap(priv->mdio_base); - mdiobus_free(bus); - - return 0; } static const struct of_device_id xgmac_mdio_match[] = { @@ -364,7 +396,6 @@ static struct platform_driver xgmac_mdio_driver = { .acpi_match_table = xgmac_acpi_match, }, .probe = xgmac_mdio_probe, - .remove = xgmac_mdio_remove, }; module_platform_driver(xgmac_mdio_driver); diff --git a/drivers/net/ethernet/fungible/Kconfig b/drivers/net/ethernet/fungible/Kconfig new file mode 100644 index 000000000000..1ecedecc0f6c --- /dev/null +++ b/drivers/net/ethernet/fungible/Kconfig @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Fungible network driver configuration +# + +config NET_VENDOR_FUNGIBLE + bool "Fungible devices" + default y + help + If you have a Fungible network device, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Fungible cards. If you say Y, you will be asked + for your specific card in the following questions. + +if NET_VENDOR_FUNGIBLE + +config FUN_CORE + tristate + select SBITMAP + help + A service module offering basic common services to Fungible + device drivers. + +source "drivers/net/ethernet/fungible/funeth/Kconfig" + +endif # NET_VENDOR_FUNGIBLE diff --git a/drivers/net/ethernet/fungible/Makefile b/drivers/net/ethernet/fungible/Makefile new file mode 100644 index 000000000000..df759f1585a1 --- /dev/null +++ b/drivers/net/ethernet/fungible/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +# +# Makefile for the Fungible network device drivers. +# + +obj-$(CONFIG_FUN_CORE) += funcore/ +obj-$(CONFIG_FUN_ETH) += funeth/ diff --git a/drivers/net/ethernet/fungible/funcore/Makefile b/drivers/net/ethernet/fungible/funcore/Makefile new file mode 100644 index 000000000000..bc16b264b53e --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +obj-$(CONFIG_FUN_CORE) += funcore.o + +funcore-y := fun_dev.o fun_queue.o diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c new file mode 100644 index 000000000000..5d7aef73df61 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/aer.h> +#include <linux/bitmap.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/nvme.h> +#include <linux/pci.h> +#include <linux/wait.h> +#include <linux/sched/signal.h> + +#include "fun_queue.h" +#include "fun_dev.h" + +#define FUN_ADMIN_CMD_TO_MS 3000 + +enum { + AQA_ASQS_SHIFT = 0, + AQA_ACQS_SHIFT = 16, + AQA_MIN_QUEUE_SIZE = 2, + AQA_MAX_QUEUE_SIZE = 4096 +}; + +/* context for admin commands */ +struct fun_cmd_ctx { + fun_admin_callback_t cb; /* callback to invoke on completion */ + void *cb_data; /* user data provided to callback */ + int cpu; /* CPU where the cmd's tag was allocated */ +}; + +/* Context for synchronous admin commands. */ +struct fun_sync_cmd_ctx { + struct completion compl; + u8 *rsp_buf; /* caller provided response buffer */ + unsigned int rsp_len; /* response buffer size */ + u8 rsp_status; /* command response status */ +}; + +/* Wait for the CSTS.RDY bit to match @enabled. */ +static int fun_wait_ready(struct fun_dev *fdev, bool enabled) +{ + unsigned int cap_to = NVME_CAP_TIMEOUT(fdev->cap_reg); + u32 bit = enabled ? NVME_CSTS_RDY : 0; + unsigned long deadline; + + deadline = ((cap_to + 1) * HZ / 2) + jiffies; /* CAP.TO is in 500ms */ + + for (;;) { + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + + if (csts == ~0) { + dev_err(fdev->dev, "CSTS register read %#x\n", csts); + return -EIO; + } + + if ((csts & NVME_CSTS_RDY) == bit) + return 0; + + if (time_is_before_jiffies(deadline)) + break; + + msleep(100); + } + + dev_err(fdev->dev, + "Timed out waiting for device to indicate RDY %u; aborting %s\n", + enabled, enabled ? "initialization" : "reset"); + return -ETIMEDOUT; +} + +/* Check CSTS and return an error if it is unreadable or has unexpected + * RDY value. + */ +static int fun_check_csts_rdy(struct fun_dev *fdev, unsigned int expected_rdy) +{ + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + u32 actual_rdy = csts & NVME_CSTS_RDY; + + if (csts == ~0) { + dev_err(fdev->dev, "CSTS register read %#x\n", csts); + return -EIO; + } + if (actual_rdy != expected_rdy) { + dev_err(fdev->dev, "Unexpected CSTS RDY %u\n", actual_rdy); + return -EINVAL; + } + return 0; +} + +/* Check that CSTS RDY has the expected value. Then write a new value to the CC + * register and wait for CSTS RDY to match the new CC ENABLE state. + */ +static int fun_update_cc_enable(struct fun_dev *fdev, unsigned int initial_rdy) +{ + int rc = fun_check_csts_rdy(fdev, initial_rdy); + + if (rc) + return rc; + writel(fdev->cc_reg, fdev->bar + NVME_REG_CC); + return fun_wait_ready(fdev, !!(fdev->cc_reg & NVME_CC_ENABLE)); +} + +static int fun_disable_ctrl(struct fun_dev *fdev) +{ + fdev->cc_reg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE); + return fun_update_cc_enable(fdev, 1); +} + +static int fun_enable_ctrl(struct fun_dev *fdev, u32 admin_cqesz_log2, + u32 admin_sqesz_log2) +{ + fdev->cc_reg = (admin_cqesz_log2 << NVME_CC_IOCQES_SHIFT) | + (admin_sqesz_log2 << NVME_CC_IOSQES_SHIFT) | + ((PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT) | + NVME_CC_ENABLE; + + return fun_update_cc_enable(fdev, 0); +} + +static int fun_map_bars(struct fun_dev *fdev, const char *name) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + int err; + + err = pci_request_mem_regions(pdev, name); + if (err) { + dev_err(&pdev->dev, + "Couldn't get PCI memory resources, err %d\n", err); + return err; + } + + fdev->bar = pci_ioremap_bar(pdev, 0); + if (!fdev->bar) { + dev_err(&pdev->dev, "Couldn't map BAR 0\n"); + pci_release_mem_regions(pdev); + return -ENOMEM; + } + + return 0; +} + +static void fun_unmap_bars(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + + if (fdev->bar) { + iounmap(fdev->bar); + fdev->bar = NULL; + pci_release_mem_regions(pdev); + } +} + +static int fun_set_dma_masks(struct device *dev) +{ + int err; + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (err) + dev_err(dev, "DMA mask configuration failed, err %d\n", err); + return err; +} + +static irqreturn_t fun_admin_irq(int irq, void *data) +{ + struct fun_queue *funq = data; + + return fun_process_cq(funq, 0) ? IRQ_HANDLED : IRQ_NONE; +} + +static void fun_complete_admin_cmd(struct fun_queue *funq, void *data, + void *entry, const struct fun_cqe_info *info) +{ + const struct fun_admin_rsp_common *rsp_common = entry; + struct fun_dev *fdev = funq->fdev; + struct fun_cmd_ctx *cmd_ctx; + int cpu; + u16 cid; + + if (info->sqhd == cpu_to_be16(0xffff)) { + dev_dbg(fdev->dev, "adminq event"); + if (fdev->adminq_cb) + fdev->adminq_cb(fdev, entry); + return; + } + + cid = be16_to_cpu(rsp_common->cid); + dev_dbg(fdev->dev, "admin CQE cid %u, op %u, ret %u\n", cid, + rsp_common->op, rsp_common->ret); + + cmd_ctx = &fdev->cmd_ctx[cid]; + if (cmd_ctx->cpu < 0) { + dev_err(fdev->dev, + "admin CQE with CID=%u, op=%u does not match a pending command\n", + cid, rsp_common->op); + return; + } + + if (cmd_ctx->cb) + cmd_ctx->cb(fdev, entry, xchg(&cmd_ctx->cb_data, NULL)); + + cpu = cmd_ctx->cpu; + cmd_ctx->cpu = -1; + sbitmap_queue_clear(&fdev->admin_sbq, cid, cpu); +} + +static int fun_init_cmd_ctx(struct fun_dev *fdev, unsigned int ntags) +{ + unsigned int i; + + fdev->cmd_ctx = kvcalloc(ntags, sizeof(*fdev->cmd_ctx), GFP_KERNEL); + if (!fdev->cmd_ctx) + return -ENOMEM; + + for (i = 0; i < ntags; i++) + fdev->cmd_ctx[i].cpu = -1; + + return 0; +} + +/* Allocate and enable an admin queue and assign it the first IRQ vector. */ +static int fun_enable_admin_queue(struct fun_dev *fdev, + const struct fun_dev_params *areq) +{ + struct fun_queue_alloc_req qreq = { + .cqe_size_log2 = areq->cqe_size_log2, + .sqe_size_log2 = areq->sqe_size_log2, + .cq_depth = areq->cq_depth, + .sq_depth = areq->sq_depth, + .rq_depth = areq->rq_depth, + }; + unsigned int ntags = areq->sq_depth - 1; + struct fun_queue *funq; + int rc; + + if (fdev->admin_q) + return -EEXIST; + + if (areq->sq_depth < AQA_MIN_QUEUE_SIZE || + areq->sq_depth > AQA_MAX_QUEUE_SIZE || + areq->cq_depth < AQA_MIN_QUEUE_SIZE || + areq->cq_depth > AQA_MAX_QUEUE_SIZE) + return -EINVAL; + + fdev->admin_q = fun_alloc_queue(fdev, 0, &qreq); + if (!fdev->admin_q) + return -ENOMEM; + + rc = fun_init_cmd_ctx(fdev, ntags); + if (rc) + goto free_q; + + rc = sbitmap_queue_init_node(&fdev->admin_sbq, ntags, -1, false, + GFP_KERNEL, dev_to_node(fdev->dev)); + if (rc) + goto free_cmd_ctx; + + funq = fdev->admin_q; + funq->cq_vector = 0; + rc = fun_request_irq(funq, dev_name(fdev->dev), fun_admin_irq, funq); + if (rc) + goto free_sbq; + + fun_set_cq_callback(funq, fun_complete_admin_cmd, NULL); + fdev->adminq_cb = areq->event_cb; + + writel((funq->sq_depth - 1) << AQA_ASQS_SHIFT | + (funq->cq_depth - 1) << AQA_ACQS_SHIFT, + fdev->bar + NVME_REG_AQA); + + writeq(funq->sq_dma_addr, fdev->bar + NVME_REG_ASQ); + writeq(funq->cq_dma_addr, fdev->bar + NVME_REG_ACQ); + + rc = fun_enable_ctrl(fdev, areq->cqe_size_log2, areq->sqe_size_log2); + if (rc) + goto free_irq; + + if (areq->rq_depth) { + rc = fun_create_rq(funq); + if (rc) + goto disable_ctrl; + + funq_rq_post(funq); + } + + return 0; + +disable_ctrl: + fun_disable_ctrl(fdev); +free_irq: + fun_free_irq(funq); +free_sbq: + sbitmap_queue_free(&fdev->admin_sbq); +free_cmd_ctx: + kvfree(fdev->cmd_ctx); + fdev->cmd_ctx = NULL; +free_q: + fun_free_queue(fdev->admin_q); + fdev->admin_q = NULL; + return rc; +} + +static void fun_disable_admin_queue(struct fun_dev *fdev) +{ + struct fun_queue *admq = fdev->admin_q; + + if (!admq) + return; + + fun_disable_ctrl(fdev); + + fun_free_irq(admq); + __fun_process_cq(admq, 0); + + sbitmap_queue_free(&fdev->admin_sbq); + + kvfree(fdev->cmd_ctx); + fdev->cmd_ctx = NULL; + + fun_free_queue(admq); + fdev->admin_q = NULL; +} + +/* Return %true if the admin queue has stopped servicing commands as can be + * detected through registers. This isn't exhaustive and may provide false + * negatives. + */ +static bool fun_adminq_stopped(struct fun_dev *fdev) +{ + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + + return (csts & (NVME_CSTS_CFS | NVME_CSTS_RDY)) != NVME_CSTS_RDY; +} + +static int fun_wait_for_tag(struct fun_dev *fdev, int *cpup) +{ + struct sbitmap_queue *sbq = &fdev->admin_sbq; + struct sbq_wait_state *ws = &sbq->ws[0]; + DEFINE_SBQ_WAIT(wait); + int tag; + + for (;;) { + sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_UNINTERRUPTIBLE); + if (fdev->suppress_cmds) { + tag = -ESHUTDOWN; + break; + } + tag = sbitmap_queue_get(sbq, cpup); + if (tag >= 0) + break; + schedule(); + } + + sbitmap_finish_wait(sbq, ws, &wait); + return tag; +} + +/* Submit an asynchronous admin command. Caller is responsible for implementing + * any waiting or timeout. Upon command completion the callback @cb is called. + */ +int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd, + fun_admin_callback_t cb, void *cb_data, bool wait_ok) +{ + struct fun_queue *funq = fdev->admin_q; + unsigned int cmdsize = cmd->len8 * 8; + struct fun_cmd_ctx *cmd_ctx; + int tag, cpu, rc = 0; + + if (WARN_ON(cmdsize > (1 << funq->sqe_size_log2))) + return -EMSGSIZE; + + tag = sbitmap_queue_get(&fdev->admin_sbq, &cpu); + if (tag < 0) { + if (!wait_ok) + return -EAGAIN; + tag = fun_wait_for_tag(fdev, &cpu); + if (tag < 0) + return tag; + } + + cmd->cid = cpu_to_be16(tag); + + cmd_ctx = &fdev->cmd_ctx[tag]; + cmd_ctx->cb = cb; + cmd_ctx->cb_data = cb_data; + + spin_lock(&funq->sq_lock); + + if (unlikely(fdev->suppress_cmds)) { + rc = -ESHUTDOWN; + sbitmap_queue_clear(&fdev->admin_sbq, tag, cpu); + } else { + cmd_ctx->cpu = cpu; + memcpy(fun_sqe_at(funq, funq->sq_tail), cmd, cmdsize); + + dev_dbg(fdev->dev, "admin cmd @ %u: %8ph\n", funq->sq_tail, + cmd); + + if (++funq->sq_tail == funq->sq_depth) + funq->sq_tail = 0; + writel(funq->sq_tail, funq->sq_db); + } + spin_unlock(&funq->sq_lock); + return rc; +} + +/* Abandon a pending admin command by clearing the issuer's callback data. + * Failure indicates that the command either has already completed or its + * completion is racing with this call. + */ +static bool fun_abandon_admin_cmd(struct fun_dev *fd, + const struct fun_admin_req_common *cmd, + void *cb_data) +{ + u16 cid = be16_to_cpu(cmd->cid); + struct fun_cmd_ctx *cmd_ctx = &fd->cmd_ctx[cid]; + + return cmpxchg(&cmd_ctx->cb_data, cb_data, NULL) == cb_data; +} + +/* Stop submission of new admin commands and wake up any processes waiting for + * tags. Already submitted commands are left to complete or time out. + */ +static void fun_admin_stop(struct fun_dev *fdev) +{ + spin_lock(&fdev->admin_q->sq_lock); + fdev->suppress_cmds = true; + spin_unlock(&fdev->admin_q->sq_lock); + sbitmap_queue_wake_all(&fdev->admin_sbq); +} + +/* The callback for synchronous execution of admin commands. It copies the + * command response to the caller's buffer and signals completion. + */ +static void fun_admin_cmd_sync_cb(struct fun_dev *fd, void *rsp, void *cb_data) +{ + const struct fun_admin_rsp_common *rsp_common = rsp; + struct fun_sync_cmd_ctx *ctx = cb_data; + + if (!ctx) + return; /* command issuer timed out and left */ + if (ctx->rsp_buf) { + unsigned int rsp_len = rsp_common->len8 * 8; + + if (unlikely(rsp_len > ctx->rsp_len)) { + dev_err(fd->dev, + "response for op %u is %uB > response buffer %uB\n", + rsp_common->op, rsp_len, ctx->rsp_len); + rsp_len = ctx->rsp_len; + } + memcpy(ctx->rsp_buf, rsp, rsp_len); + } + ctx->rsp_status = rsp_common->ret; + complete(&ctx->compl); +} + +/* Submit a synchronous admin command. */ +int fun_submit_admin_sync_cmd(struct fun_dev *fdev, + struct fun_admin_req_common *cmd, void *rsp, + size_t rspsize, unsigned int timeout) +{ + struct fun_sync_cmd_ctx ctx = { + .compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl), + .rsp_buf = rsp, + .rsp_len = rspsize, + }; + unsigned int cmdlen = cmd->len8 * 8; + unsigned long jiffies_left; + int ret; + + ret = fun_submit_admin_cmd(fdev, cmd, fun_admin_cmd_sync_cb, &ctx, + true); + if (ret) + return ret; + + if (!timeout) + timeout = FUN_ADMIN_CMD_TO_MS; + + jiffies_left = wait_for_completion_timeout(&ctx.compl, + msecs_to_jiffies(timeout)); + if (!jiffies_left) { + /* The command timed out. Attempt to cancel it so we can return. + * But if the command is in the process of completing we'll + * wait for it. + */ + if (fun_abandon_admin_cmd(fdev, cmd, &ctx)) { + dev_err(fdev->dev, "admin command timed out: %*ph\n", + cmdlen, cmd); + fun_admin_stop(fdev); + /* see if the timeout was due to a queue failure */ + if (fun_adminq_stopped(fdev)) + dev_err(fdev->dev, + "device does not accept admin commands\n"); + + return -ETIMEDOUT; + } + wait_for_completion(&ctx.compl); + } + + if (ctx.rsp_status) { + dev_err(fdev->dev, "admin command failed, err %d: %*ph\n", + ctx.rsp_status, cmdlen, cmd); + } + + return -ctx.rsp_status; +} +EXPORT_SYMBOL_GPL(fun_submit_admin_sync_cmd); + +/* Return the number of device resources of the requested type. */ +int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res) +{ + union { + struct fun_admin_res_count_req req; + struct fun_admin_res_count_rsp rsp; + } cmd; + int rc; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(cmd.req)); + cmd.req.count = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_RES_COUNT, + 0, 0); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd), 0); + return rc ? rc : be32_to_cpu(cmd.rsp.count.data); +} +EXPORT_SYMBOL_GPL(fun_get_res_count); + +/* Request that the instance of resource @res with the given id be deleted. */ +int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res, + unsigned int flags, u32 id) +{ + struct fun_admin_generic_destroy_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(req)), + .destroy = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_DESTROY, + flags, id) + }; + + return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0); +} +EXPORT_SYMBOL_GPL(fun_res_destroy); + +/* Bind two entities of the given types and IDs. */ +int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0, + unsigned int id0, enum fun_admin_bind_type type1, + unsigned int id1) +{ + struct { + struct fun_admin_bind_req req; + struct fun_admin_bind_entry entry[2]; + } cmd = { + .req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND, + sizeof(cmd)), + .entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0), + .entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1), + }; + + return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0); +} +EXPORT_SYMBOL_GPL(fun_bind); + +static int fun_get_dev_limits(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + unsigned int cq_count, sq_count, num_dbs; + int rc; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPCQ); + if (rc < 0) + return rc; + cq_count = rc; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPSQ); + if (rc < 0) + return rc; + sq_count = rc; + + /* The admin queue consumes 1 CQ and at least 1 SQ. To be usable the + * device must provide additional queues. + */ + if (cq_count < 2 || sq_count < 2 + !!fdev->admin_q->rq_depth) + return -EINVAL; + + /* Calculate the max QID based on SQ/CQ/doorbell counts. + * SQ/CQ doorbells alternate. + */ + num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) / + (fdev->db_stride * 4); + fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1; + fdev->kern_end_qid = fdev->max_qid + 1; + return 0; +} + +/* Allocate all MSI-X vectors available on a function and at least @min_vecs. */ +static int fun_alloc_irqs(struct pci_dev *pdev, unsigned int min_vecs) +{ + int vecs, num_msix = pci_msix_vec_count(pdev); + + if (num_msix < 0) + return num_msix; + if (min_vecs > num_msix) + return -ERANGE; + + vecs = pci_alloc_irq_vectors(pdev, min_vecs, num_msix, PCI_IRQ_MSIX); + if (vecs > 0) { + dev_info(&pdev->dev, + "Allocated %d IRQ vectors of %d requested\n", + vecs, num_msix); + } else { + dev_err(&pdev->dev, + "Unable to allocate at least %u IRQ vectors\n", + min_vecs); + } + return vecs; +} + +/* Allocate and initialize the IRQ manager state. */ +static int fun_alloc_irq_mgr(struct fun_dev *fdev) +{ + fdev->irq_map = bitmap_zalloc(fdev->num_irqs, GFP_KERNEL); + if (!fdev->irq_map) + return -ENOMEM; + + spin_lock_init(&fdev->irqmgr_lock); + /* mark IRQ 0 allocated, it is used by the admin queue */ + __set_bit(0, fdev->irq_map); + fdev->irqs_avail = fdev->num_irqs - 1; + return 0; +} + +/* Reserve @nirqs of the currently available IRQs and return their indices. */ +int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, u16 *irq_indices) +{ + unsigned int b, n = 0; + int err = -ENOSPC; + + if (!nirqs) + return 0; + + spin_lock(&fdev->irqmgr_lock); + if (nirqs > fdev->irqs_avail) + goto unlock; + + for_each_clear_bit(b, fdev->irq_map, fdev->num_irqs) { + __set_bit(b, fdev->irq_map); + irq_indices[n++] = b; + if (n >= nirqs) + break; + } + + WARN_ON(n < nirqs); + fdev->irqs_avail -= n; + err = n; +unlock: + spin_unlock(&fdev->irqmgr_lock); + return err; +} +EXPORT_SYMBOL(fun_reserve_irqs); + +/* Release @nirqs previously allocated IRQS with the supplied indices. */ +void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices) +{ + unsigned int i; + + spin_lock(&fdev->irqmgr_lock); + for (i = 0; i < nirqs; i++) + __clear_bit(irq_indices[i], fdev->irq_map); + fdev->irqs_avail += nirqs; + spin_unlock(&fdev->irqmgr_lock); +} +EXPORT_SYMBOL(fun_release_irqs); + +static void fun_serv_handler(struct work_struct *work) +{ + struct fun_dev *fd = container_of(work, struct fun_dev, service_task); + + if (test_bit(FUN_SERV_DISABLED, &fd->service_flags)) + return; + if (fd->serv_cb) + fd->serv_cb(fd); +} + +void fun_serv_stop(struct fun_dev *fd) +{ + set_bit(FUN_SERV_DISABLED, &fd->service_flags); + cancel_work_sync(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_stop); + +void fun_serv_restart(struct fun_dev *fd) +{ + clear_bit(FUN_SERV_DISABLED, &fd->service_flags); + if (fd->service_flags) + schedule_work(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_restart); + +void fun_serv_sched(struct fun_dev *fd) +{ + if (!test_bit(FUN_SERV_DISABLED, &fd->service_flags)) + schedule_work(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_sched); + +/* Check and try to get the device into a proper state for initialization, + * i.e., CSTS.RDY = CC.EN = 0. + */ +static int sanitize_dev(struct fun_dev *fdev) +{ + int rc; + + fdev->cap_reg = readq(fdev->bar + NVME_REG_CAP); + fdev->cc_reg = readl(fdev->bar + NVME_REG_CC); + + /* First get RDY to agree with the current EN. Give RDY the opportunity + * to complete a potential recent EN change. + */ + rc = fun_wait_ready(fdev, fdev->cc_reg & NVME_CC_ENABLE); + if (rc) + return rc; + + /* Next, reset the device if EN is currently 1. */ + if (fdev->cc_reg & NVME_CC_ENABLE) + rc = fun_disable_ctrl(fdev); + + return rc; +} + +/* Undo the device initialization of fun_dev_enable(). */ +void fun_dev_disable(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + + pci_set_drvdata(pdev, NULL); + + if (fdev->fw_handle != FUN_HCI_ID_INVALID) { + fun_res_destroy(fdev, FUN_ADMIN_OP_SWUPGRADE, 0, + fdev->fw_handle); + fdev->fw_handle = FUN_HCI_ID_INVALID; + } + + fun_disable_admin_queue(fdev); + + bitmap_free(fdev->irq_map); + pci_free_irq_vectors(pdev); + + pci_clear_master(pdev); + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + + fun_unmap_bars(fdev); +} +EXPORT_SYMBOL(fun_dev_disable); + +/* Perform basic initialization of a device, including + * - PCI config space setup and BAR0 mapping + * - interrupt management initialization + * - 1 admin queue setup + * - determination of some device limits, such as number of queues. + */ +int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, + const struct fun_dev_params *areq, const char *name) +{ + int rc; + + fdev->dev = &pdev->dev; + rc = fun_map_bars(fdev, name); + if (rc) + return rc; + + rc = fun_set_dma_masks(fdev->dev); + if (rc) + goto unmap; + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(&pdev->dev, "Couldn't enable device, err %d\n", rc); + goto unmap; + } + + pci_enable_pcie_error_reporting(pdev); + + rc = sanitize_dev(fdev); + if (rc) + goto disable_dev; + + fdev->fw_handle = FUN_HCI_ID_INVALID; + fdev->q_depth = NVME_CAP_MQES(fdev->cap_reg) + 1; + fdev->db_stride = 1 << NVME_CAP_STRIDE(fdev->cap_reg); + fdev->dbs = fdev->bar + NVME_REG_DBS; + + INIT_WORK(&fdev->service_task, fun_serv_handler); + fdev->service_flags = FUN_SERV_DISABLED; + fdev->serv_cb = areq->serv_cb; + + rc = fun_alloc_irqs(pdev, areq->min_msix + 1); /* +1 for admin CQ */ + if (rc < 0) + goto disable_dev; + fdev->num_irqs = rc; + + rc = fun_alloc_irq_mgr(fdev); + if (rc) + goto free_irqs; + + pci_set_master(pdev); + rc = fun_enable_admin_queue(fdev, areq); + if (rc) + goto free_irq_mgr; + + rc = fun_get_dev_limits(fdev); + if (rc < 0) + goto disable_admin; + + pci_save_state(pdev); + pci_set_drvdata(pdev, fdev); + pcie_print_link_status(pdev); + dev_dbg(fdev->dev, "q_depth %u, db_stride %u, max qid %d kern_end_qid %d\n", + fdev->q_depth, fdev->db_stride, fdev->max_qid, + fdev->kern_end_qid); + return 0; + +disable_admin: + fun_disable_admin_queue(fdev); +free_irq_mgr: + pci_clear_master(pdev); + bitmap_free(fdev->irq_map); +free_irqs: + pci_free_irq_vectors(pdev); +disable_dev: + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); +unmap: + fun_unmap_bars(fdev); + return rc; +} +EXPORT_SYMBOL(fun_dev_enable); + +MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>"); +MODULE_DESCRIPTION("Core services driver for Fungible devices"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.h b/drivers/net/ethernet/fungible/funcore/fun_dev.h new file mode 100644 index 000000000000..9e8c17ce8887 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNDEV_H +#define _FUNDEV_H + +#include <linux/sbitmap.h> +#include <linux/spinlock_types.h> +#include <linux/workqueue.h> +#include "fun_hci.h" + +struct pci_dev; +struct fun_dev; +struct fun_queue; +struct fun_cmd_ctx; +struct fun_queue_alloc_req; + +/* doorbell fields */ +enum { + FUN_DB_QIDX_S = 0, + FUN_DB_INTCOAL_ENTRIES_S = 16, + FUN_DB_INTCOAL_ENTRIES_M = 0x7f, + FUN_DB_INTCOAL_USEC_S = 23, + FUN_DB_INTCOAL_USEC_M = 0x7f, + FUN_DB_IRQ_S = 30, + FUN_DB_IRQ_F = 1 << FUN_DB_IRQ_S, + FUN_DB_IRQ_ARM_S = 31, + FUN_DB_IRQ_ARM_F = 1U << FUN_DB_IRQ_ARM_S +}; + +/* Callback for asynchronous admin commands. + * Invoked on reception of command response. + */ +typedef void (*fun_admin_callback_t)(struct fun_dev *fdev, void *rsp, + void *cb_data); + +/* Callback for events/notifications received by an admin queue. */ +typedef void (*fun_admin_event_cb)(struct fun_dev *fdev, void *cqe); + +/* Callback for pending work handled by the service task. */ +typedef void (*fun_serv_cb)(struct fun_dev *fd); + +/* service task flags */ +enum { + FUN_SERV_DISABLED, /* service task is disabled */ + FUN_SERV_FIRST_AVAIL +}; + +/* Driver state associated with a PCI function. */ +struct fun_dev { + struct device *dev; + + void __iomem *bar; /* start of BAR0 mapping */ + u32 __iomem *dbs; /* start of doorbells in BAR0 mapping */ + + /* admin queue */ + struct fun_queue *admin_q; + struct sbitmap_queue admin_sbq; + struct fun_cmd_ctx *cmd_ctx; + fun_admin_event_cb adminq_cb; + bool suppress_cmds; /* if set don't write commands to SQ */ + + /* address increment between consecutive doorbells, in 4B units */ + unsigned int db_stride; + + /* SW versions of device registers */ + u32 cc_reg; /* CC register */ + u64 cap_reg; /* CAPability register */ + + unsigned int q_depth; /* max queue depth supported by device */ + unsigned int max_qid; /* = #queues - 1, separately for SQs and CQs */ + unsigned int kern_end_qid; /* last qid in the kernel range + 1 */ + + unsigned int fw_handle; + + /* IRQ manager */ + unsigned int num_irqs; + unsigned int irqs_avail; + spinlock_t irqmgr_lock; + unsigned long *irq_map; + + /* The service task handles work that needs a process context */ + struct work_struct service_task; + unsigned long service_flags; + fun_serv_cb serv_cb; +}; + +struct fun_dev_params { + u8 cqe_size_log2; /* admin q CQE size */ + u8 sqe_size_log2; /* admin q SQE size */ + + /* admin q depths */ + u16 cq_depth; + u16 sq_depth; + u16 rq_depth; + + u16 min_msix; /* min vectors needed by requesting driver */ + + fun_admin_event_cb event_cb; + fun_serv_cb serv_cb; +}; + +/* Return the BAR address of a doorbell. */ +static inline u32 __iomem *fun_db_addr(const struct fun_dev *fdev, + unsigned int db_index) +{ + return &fdev->dbs[db_index * fdev->db_stride]; +} + +/* Return the BAR address of an SQ doorbell. SQ and CQ DBs alternate, + * SQs have even DB indices. + */ +static inline u32 __iomem *fun_sq_db_addr(const struct fun_dev *fdev, + unsigned int sqid) +{ + return fun_db_addr(fdev, sqid * 2); +} + +static inline u32 __iomem *fun_cq_db_addr(const struct fun_dev *fdev, + unsigned int cqid) +{ + return fun_db_addr(fdev, cqid * 2 + 1); +} + +int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res); +int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res, + unsigned int flags, u32 id); +int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0, + unsigned int id0, enum fun_admin_bind_type type1, + unsigned int id1); + +int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd, + fun_admin_callback_t cb, void *cb_data, bool wait_ok); +int fun_submit_admin_sync_cmd(struct fun_dev *fdev, + struct fun_admin_req_common *cmd, void *rsp, + size_t rspsize, unsigned int timeout); + +int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, + const struct fun_dev_params *areq, const char *name); +void fun_dev_disable(struct fun_dev *fdev); + +int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices); +void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices); + +void fun_serv_stop(struct fun_dev *fd); +void fun_serv_restart(struct fun_dev *fd); +void fun_serv_sched(struct fun_dev *fd); + +#endif /* _FUNDEV_H */ diff --git a/drivers/net/ethernet/fungible/funcore/fun_hci.h b/drivers/net/ethernet/fungible/funcore/fun_hci.h new file mode 100644 index 000000000000..257203e94b68 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_hci.h @@ -0,0 +1,1202 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef __FUN_HCI_H +#define __FUN_HCI_H + +enum { + FUN_HCI_ID_INVALID = 0xffffffff, +}; + +enum fun_admin_op { + FUN_ADMIN_OP_BIND = 0x1, + FUN_ADMIN_OP_EPCQ = 0x11, + FUN_ADMIN_OP_EPSQ = 0x12, + FUN_ADMIN_OP_PORT = 0x13, + FUN_ADMIN_OP_ETH = 0x14, + FUN_ADMIN_OP_VI = 0x15, + FUN_ADMIN_OP_SWUPGRADE = 0x1f, + FUN_ADMIN_OP_RSS = 0x21, + FUN_ADMIN_OP_ADI = 0x25, + FUN_ADMIN_OP_KTLS = 0x26, +}; + +enum { + FUN_REQ_COMMON_FLAG_RSP = 0x1, + FUN_REQ_COMMON_FLAG_HEAD_WB = 0x2, + FUN_REQ_COMMON_FLAG_INT = 0x4, + FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF = 0x8, +}; + +struct fun_admin_req_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 rsvd0; + __be16 cid; +}; + +#define FUN_ADMIN_REQ_COMMON_INIT(_op, _len8, _flags, _suboff8, _cid) \ + (struct fun_admin_req_common) { \ + .op = (_op), .len8 = (_len8), .flags = cpu_to_be16(_flags), \ + .suboff8 = (_suboff8), .cid = cpu_to_be16(_cid), \ + } + +#define FUN_ADMIN_REQ_COMMON_INIT2(_op, _len) \ + (struct fun_admin_req_common) { \ + .op = (_op), .len8 = (_len) / 8, \ + } + +struct fun_admin_rsp_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 ret; + __be16 cid; +}; + +struct fun_admin_write48_req { + __be64 key_to_data; +}; + +#define FUN_ADMIN_WRITE48_REQ_KEY_S 56U +#define FUN_ADMIN_WRITE48_REQ_KEY_M 0xff +#define FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_WRITE48_REQ_KEY_S) + +#define FUN_ADMIN_WRITE48_REQ_DATA_S 0U +#define FUN_ADMIN_WRITE48_REQ_DATA_M 0xffffffffffff +#define FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_WRITE48_REQ_DATA_S) + +#define FUN_ADMIN_WRITE48_REQ_INIT(key, data) \ + (struct fun_admin_write48_req) { \ + .key_to_data = cpu_to_be64( \ + FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(key) | \ + FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(data)), \ + } + +struct fun_admin_write48_rsp { + __be64 key_to_data; +}; + +struct fun_admin_read48_req { + __be64 key_pack; +}; + +#define FUN_ADMIN_READ48_REQ_KEY_S 56U +#define FUN_ADMIN_READ48_REQ_KEY_M 0xff +#define FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_READ48_REQ_KEY_S) + +#define FUN_ADMIN_READ48_REQ_INIT(key) \ + (struct fun_admin_read48_req) { \ + .key_pack = \ + cpu_to_be64(FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(key)), \ + } + +struct fun_admin_read48_rsp { + __be64 key_to_data; +}; + +#define FUN_ADMIN_READ48_RSP_KEY_S 56U +#define FUN_ADMIN_READ48_RSP_KEY_M 0xff +#define FUN_ADMIN_READ48_RSP_KEY_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_KEY_S) & \ + FUN_ADMIN_READ48_RSP_KEY_M) + +#define FUN_ADMIN_READ48_RSP_RET_S 48U +#define FUN_ADMIN_READ48_RSP_RET_M 0xff +#define FUN_ADMIN_READ48_RSP_RET_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_RET_S) & \ + FUN_ADMIN_READ48_RSP_RET_M) + +#define FUN_ADMIN_READ48_RSP_DATA_S 0U +#define FUN_ADMIN_READ48_RSP_DATA_M 0xffffffffffff +#define FUN_ADMIN_READ48_RSP_DATA_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_DATA_S) & \ + FUN_ADMIN_READ48_RSP_DATA_M) + +enum fun_admin_bind_type { + FUN_ADMIN_BIND_TYPE_EPCQ = 0x1, + FUN_ADMIN_BIND_TYPE_EPSQ = 0x2, + FUN_ADMIN_BIND_TYPE_PORT = 0x3, + FUN_ADMIN_BIND_TYPE_RSS = 0x4, + FUN_ADMIN_BIND_TYPE_VI = 0x5, + FUN_ADMIN_BIND_TYPE_ETH = 0x6, +}; + +struct fun_admin_bind_entry { + __u8 type; + __u8 rsvd0[3]; + __be32 id; +}; + +#define FUN_ADMIN_BIND_ENTRY_INIT(_type, _id) \ + (struct fun_admin_bind_entry) { \ + .type = (_type), .id = cpu_to_be32(_id), \ + } + +struct fun_admin_bind_req { + struct fun_admin_req_common common; + struct fun_admin_bind_entry entry[]; +}; + +struct fun_admin_bind_rsp { + struct fun_admin_rsp_common bind_rsp_common; +}; + +struct fun_admin_simple_subop { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 data; +}; + +#define FUN_ADMIN_SIMPLE_SUBOP_INIT(_subop, _flags, _data) \ + (struct fun_admin_simple_subop) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .data = cpu_to_be32(_data), \ + } + +enum fun_admin_subop { + FUN_ADMIN_SUBOP_CREATE = 0x10, + FUN_ADMIN_SUBOP_DESTROY = 0x11, + FUN_ADMIN_SUBOP_MODIFY = 0x12, + FUN_ADMIN_SUBOP_RES_COUNT = 0x14, + FUN_ADMIN_SUBOP_READ = 0x15, + FUN_ADMIN_SUBOP_WRITE = 0x16, + FUN_ADMIN_SUBOP_NOTIFY = 0x17, +}; + +enum { + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR = 0x1, +}; + +struct fun_admin_generic_destroy_req { + struct fun_admin_req_common common; + struct fun_admin_simple_subop destroy; +}; + +struct fun_admin_generic_create_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; +}; + +struct fun_admin_res_count_req { + struct fun_admin_req_common common; + struct fun_admin_simple_subop count; +}; + +struct fun_admin_res_count_rsp { + struct fun_admin_rsp_common common; + struct fun_admin_simple_subop count; +}; + +enum { + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_EPCQ = 0x2, + FUN_ADMIN_EPCQ_CREATE_FLAG_ENTRY_WR_TPH = 0x4, + FUN_ADMIN_EPCQ_CREATE_FLAG_SL_WR_TPH = 0x8, + FUN_ADMIN_EPCQ_CREATE_FLAG_RQ = 0x80, + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_IQ = 0x100, + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_NOARM = 0x200, + FUN_ADMIN_EPCQ_CREATE_FLAG_DROP_ON_OVERFLOW = 0x400, +}; + +struct fun_admin_epcq_req { + struct fun_admin_req_common common; + union epcq_req_subop { + struct fun_admin_epcq_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 epsqid; + __u8 rsvd1; + __u8 entry_size_log2; + __be16 nentries; + + __be64 address; + + __be16 tailroom; /* per packet tailroom in bytes */ + __u8 headroom; /* per packet headroom in 2B units */ + __u8 intcoal_kbytes; + __u8 intcoal_holdoff_nentries; + __u8 intcoal_holdoff_usecs; + __be16 intid; + + __be32 scan_start_id; + __be32 scan_end_id; + + __be16 tph_cpuid; + __u8 rsvd3[6]; + } create; + + struct fun_admin_epcq_modify_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be16 headroom; /* headroom in bytes */ + __u8 rsvd1[6]; + } modify; + } u; +}; + +#define FUN_ADMIN_EPCQ_CREATE_REQ_INIT( \ + _subop, _flags, _id, _epsqid, _entry_size_log2, _nentries, _address, \ + _tailroom, _headroom, _intcoal_kbytes, _intcoal_holdoff_nentries, \ + _intcoal_holdoff_usecs, _intid, _scan_start_id, _scan_end_id, \ + _tph_cpuid) \ + (struct fun_admin_epcq_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .epsqid = cpu_to_be32(_epsqid), \ + .entry_size_log2 = _entry_size_log2, \ + .nentries = cpu_to_be16(_nentries), \ + .address = cpu_to_be64(_address), \ + .tailroom = cpu_to_be16(_tailroom), .headroom = _headroom, \ + .intcoal_kbytes = _intcoal_kbytes, \ + .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \ + .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \ + .intid = cpu_to_be16(_intid), \ + .scan_start_id = cpu_to_be32(_scan_start_id), \ + .scan_end_id = cpu_to_be32(_scan_end_id), \ + .tph_cpuid = cpu_to_be16(_tph_cpuid), \ + } + +#define FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(_subop, _flags, _id, _headroom) \ + (struct fun_admin_epcq_modify_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .headroom = cpu_to_be16(_headroom), \ + } + +enum { + FUN_ADMIN_EPSQ_CREATE_FLAG_INT_EPSQ = 0x2, + FUN_ADMIN_EPSQ_CREATE_FLAG_ENTRY_RD_TPH = 0x4, + FUN_ADMIN_EPSQ_CREATE_FLAG_GL_RD_TPH = 0x8, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS = 0x10, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS_TPH = 0x20, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_EPCQ = 0x40, + FUN_ADMIN_EPSQ_CREATE_FLAG_RQ = 0x80, + FUN_ADMIN_EPSQ_CREATE_FLAG_INT_IQ = 0x100, + FUN_ADMIN_EPSQ_CREATE_FLAG_NO_CMPL = 0x200, +}; + +struct fun_admin_epsq_req { + struct fun_admin_req_common common; + + union epsq_req_subop { + struct fun_admin_epsq_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 epcqid; + __u8 rsvd1; + __u8 entry_size_log2; + __be16 nentries; + + __be64 address; /* DMA address of epsq */ + + __u8 rsvd2[3]; + __u8 intcoal_kbytes; + __u8 intcoal_holdoff_nentries; + __u8 intcoal_holdoff_usecs; + __be16 intid; + + __be32 scan_start_id; + __be32 scan_end_id; + + __u8 rsvd3[4]; + __be16 tph_cpuid; + __u8 buf_size_log2; /* log2 of RQ buffer size */ + __u8 head_wb_size_log2; /* log2 of head write back size */ + + __be64 head_wb_address; /* DMA address for head writeback */ + } create; + } u; +}; + +#define FUN_ADMIN_EPSQ_CREATE_REQ_INIT( \ + _subop, _flags, _id, _epcqid, _entry_size_log2, _nentries, _address, \ + _intcoal_kbytes, _intcoal_holdoff_nentries, _intcoal_holdoff_usecs, \ + _intid, _scan_start_id, _scan_end_id, _tph_cpuid, _buf_size_log2, \ + _head_wb_size_log2, _head_wb_address) \ + (struct fun_admin_epsq_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .epcqid = cpu_to_be32(_epcqid), \ + .entry_size_log2 = _entry_size_log2, \ + .nentries = cpu_to_be16(_nentries), \ + .address = cpu_to_be64(_address), \ + .intcoal_kbytes = _intcoal_kbytes, \ + .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \ + .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \ + .intid = cpu_to_be16(_intid), \ + .scan_start_id = cpu_to_be32(_scan_start_id), \ + .scan_end_id = cpu_to_be32(_scan_end_id), \ + .tph_cpuid = cpu_to_be16(_tph_cpuid), \ + .buf_size_log2 = _buf_size_log2, \ + .head_wb_size_log2 = _head_wb_size_log2, \ + .head_wb_address = cpu_to_be64(_head_wb_address), \ + } + +enum { + FUN_PORT_CAP_OFFLOADS = 0x1, + FUN_PORT_CAP_STATS = 0x2, + FUN_PORT_CAP_LOOPBACK = 0x4, + FUN_PORT_CAP_VPORT = 0x8, + FUN_PORT_CAP_TX_PAUSE = 0x10, + FUN_PORT_CAP_RX_PAUSE = 0x20, + FUN_PORT_CAP_AUTONEG = 0x40, + FUN_PORT_CAP_RSS = 0x80, + FUN_PORT_CAP_VLAN_OFFLOADS = 0x100, + FUN_PORT_CAP_ENCAP_OFFLOADS = 0x200, + FUN_PORT_CAP_1000_X = 0x1000, + FUN_PORT_CAP_10G_R = 0x2000, + FUN_PORT_CAP_40G_R4 = 0x4000, + FUN_PORT_CAP_25G_R = 0x8000, + FUN_PORT_CAP_50G_R2 = 0x10000, + FUN_PORT_CAP_50G_R = 0x20000, + FUN_PORT_CAP_100G_R4 = 0x40000, + FUN_PORT_CAP_100G_R2 = 0x80000, + FUN_PORT_CAP_200G_R4 = 0x100000, + FUN_PORT_CAP_FEC_NONE = 0x10000000, + FUN_PORT_CAP_FEC_FC = 0x20000000, + FUN_PORT_CAP_FEC_RS = 0x40000000, +}; + +enum fun_port_brkout_mode { + FUN_PORT_BRKMODE_NA = 0x0, + FUN_PORT_BRKMODE_NONE = 0x1, + FUN_PORT_BRKMODE_2X = 0x2, + FUN_PORT_BRKMODE_4X = 0x3, +}; + +enum { + FUN_PORT_SPEED_AUTO = 0x0, + FUN_PORT_SPEED_10M = 0x1, + FUN_PORT_SPEED_100M = 0x2, + FUN_PORT_SPEED_1G = 0x4, + FUN_PORT_SPEED_10G = 0x8, + FUN_PORT_SPEED_25G = 0x10, + FUN_PORT_SPEED_40G = 0x20, + FUN_PORT_SPEED_50G = 0x40, + FUN_PORT_SPEED_100G = 0x80, + FUN_PORT_SPEED_200G = 0x100, +}; + +enum fun_port_duplex_mode { + FUN_PORT_FULL_DUPLEX = 0x0, + FUN_PORT_HALF_DUPLEX = 0x1, +}; + +enum { + FUN_PORT_FEC_NA = 0x0, + FUN_PORT_FEC_OFF = 0x1, + FUN_PORT_FEC_RS = 0x2, + FUN_PORT_FEC_FC = 0x4, + FUN_PORT_FEC_AUTO = 0x8, +}; + +enum fun_port_link_status { + FUN_PORT_LINK_UP = 0x0, + FUN_PORT_LINK_UP_WITH_ERR = 0x1, + FUN_PORT_LINK_DOWN = 0x2, +}; + +enum fun_port_led_type { + FUN_PORT_LED_OFF = 0x0, + FUN_PORT_LED_AMBER = 0x1, + FUN_PORT_LED_GREEN = 0x2, + FUN_PORT_LED_BEACON_ON = 0x3, + FUN_PORT_LED_BEACON_OFF = 0x4, +}; + +enum { + FUN_PORT_FLAG_MAC_DOWN = 0x1, + FUN_PORT_FLAG_MAC_UP = 0x2, + FUN_PORT_FLAG_NH_DOWN = 0x4, + FUN_PORT_FLAG_NH_UP = 0x8, +}; + +enum { + FUN_PORT_FLAG_ENABLE_NOTIFY = 0x1, +}; + +enum fun_port_lane_attr { + FUN_PORT_LANE_1 = 0x1, + FUN_PORT_LANE_2 = 0x2, + FUN_PORT_LANE_4 = 0x4, + FUN_PORT_LANE_SPEED_10G = 0x100, + FUN_PORT_LANE_SPEED_25G = 0x200, + FUN_PORT_LANE_SPEED_50G = 0x400, + FUN_PORT_LANE_SPLIT = 0x8000, +}; + +enum fun_admin_port_subop { + FUN_ADMIN_PORT_SUBOP_INETADDR_EVENT = 0x24, +}; + +enum fun_admin_port_key { + FUN_ADMIN_PORT_KEY_ILLEGAL = 0x0, + FUN_ADMIN_PORT_KEY_MTU = 0x1, + FUN_ADMIN_PORT_KEY_FEC = 0x2, + FUN_ADMIN_PORT_KEY_SPEED = 0x3, + FUN_ADMIN_PORT_KEY_DEBOUNCE = 0x4, + FUN_ADMIN_PORT_KEY_DUPLEX = 0x5, + FUN_ADMIN_PORT_KEY_MACADDR = 0x6, + FUN_ADMIN_PORT_KEY_LINKMODE = 0x7, + FUN_ADMIN_PORT_KEY_BREAKOUT = 0x8, + FUN_ADMIN_PORT_KEY_ENABLE = 0x9, + FUN_ADMIN_PORT_KEY_DISABLE = 0xa, + FUN_ADMIN_PORT_KEY_ERR_DISABLE = 0xb, + FUN_ADMIN_PORT_KEY_CAPABILITIES = 0xc, + FUN_ADMIN_PORT_KEY_LP_CAPABILITIES = 0xd, + FUN_ADMIN_PORT_KEY_STATS_DMA_LOW = 0xe, + FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH = 0xf, + FUN_ADMIN_PORT_KEY_LANE_ATTRS = 0x10, + FUN_ADMIN_PORT_KEY_LED = 0x11, + FUN_ADMIN_PORT_KEY_ADVERT = 0x12, +}; + +struct fun_subop_imm { + __u8 subop; /* see fun_data_subop enum */ + __u8 flags; + __u8 nsgl; + __u8 rsvd0; + __be32 len; + + __u8 data[]; +}; + +enum fun_subop_sgl_flags { + FUN_SUBOP_SGL_USE_OFF8 = 0x1, + FUN_SUBOP_FLAG_FREE_BUF = 0x2, + FUN_SUBOP_FLAG_IS_REFBUF = 0x4, + FUN_SUBOP_SGL_FLAG_LOCAL = 0x8, +}; + +enum fun_data_op { + FUN_DATAOP_INVALID = 0x0, + FUN_DATAOP_SL = 0x1, /* scatter */ + FUN_DATAOP_GL = 0x2, /* gather */ + FUN_DATAOP_SGL = 0x3, /* scatter-gather */ + FUN_DATAOP_IMM = 0x4, /* immediate data */ + FUN_DATAOP_RQBUF = 0x8, /* rq buffer */ +}; + +struct fun_dataop_gl { + __u8 subop; + __u8 flags; + __be16 sgl_off; + __be32 sgl_len; + + __be64 sgl_data; +}; + +static inline void fun_dataop_gl_init(struct fun_dataop_gl *s, u8 flags, + u16 sgl_off, u32 sgl_len, u64 sgl_data) +{ + s->subop = FUN_DATAOP_GL; + s->flags = flags; + s->sgl_off = cpu_to_be16(sgl_off); + s->sgl_len = cpu_to_be32(sgl_len); + s->sgl_data = cpu_to_be64(sgl_data); +} + +struct fun_dataop_imm { + __u8 subop; + __u8 flags; + __be16 rsvd0; + __be32 sgl_len; +}; + +struct fun_subop_sgl { + __u8 subop; + __u8 flags; + __u8 nsgl; + __u8 rsvd0; + __be32 sgl_len; + + __be64 sgl_data; +}; + +#define FUN_SUBOP_SGL_INIT(_subop, _flags, _nsgl, _sgl_len, _sgl_data) \ + (struct fun_subop_sgl) { \ + .subop = (_subop), .flags = (_flags), .nsgl = (_nsgl), \ + .sgl_len = cpu_to_be32(_sgl_len), \ + .sgl_data = cpu_to_be64(_sgl_data), \ + } + +struct fun_dataop_rqbuf { + __u8 subop; + __u8 rsvd0; + __be16 cid; + __be32 bufoff; +}; + +struct fun_dataop_hdr { + __u8 nsgl; + __u8 flags; + __u8 ngather; + __u8 nscatter; + __be32 total_len; + + struct fun_dataop_imm imm[]; +}; + +#define FUN_DATAOP_HDR_INIT(_nsgl, _flags, _ngather, _nscatter, _total_len) \ + (struct fun_dataop_hdr) { \ + .nsgl = _nsgl, .flags = _flags, .ngather = _ngather, \ + .nscatter = _nscatter, .total_len = cpu_to_be32(_total_len), \ + } + +enum fun_port_inetaddr_event_type { + FUN_PORT_INETADDR_ADD = 0x1, + FUN_PORT_INETADDR_DEL = 0x2, +}; + +enum fun_port_inetaddr_addr_family { + FUN_PORT_INETADDR_IPV4 = 0x1, + FUN_PORT_INETADDR_IPV6 = 0x2, +}; + +struct fun_admin_port_req { + struct fun_admin_req_common common; + + union port_req_subop { + struct fun_admin_port_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_port_write_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; /* portid */ + + struct fun_admin_write48_req write48[]; + } write; + struct fun_admin_port_read_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; /* portid */ + + struct fun_admin_read48_req read48[]; + } read; + struct fun_admin_port_inetaddr_event_req { + __u8 subop; + __u8 rsvd0; + __u8 event_type; + __u8 addr_family; + __be32 id; + + __u8 addr[]; + } inetaddr_event; + } u; +}; + +#define FUN_ADMIN_PORT_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_PORT_WRITE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_write_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_PORT_READ_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_read_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +struct fun_admin_port_rsp { + struct fun_admin_rsp_common common; + + union port_rsp_subop { + struct fun_admin_port_create_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be16 lport; + __u8 rsvd1[6]; + } create; + struct fun_admin_port_write_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + + struct fun_admin_write48_rsp write48[]; + } write; + struct fun_admin_port_read_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + + struct fun_admin_read48_rsp read48[]; + } read; + struct fun_admin_port_inetaddr_event_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + } inetaddr_event; + } u; +}; + +enum fun_xcvr_type { + FUN_XCVR_BASET = 0x0, + FUN_XCVR_CU = 0x1, + FUN_XCVR_SMF = 0x2, + FUN_XCVR_MMF = 0x3, + FUN_XCVR_AOC = 0x4, + FUN_XCVR_SFPP = 0x10, /* SFP+ or later */ + FUN_XCVR_QSFPP = 0x11, /* QSFP+ or later */ + FUN_XCVR_QSFPDD = 0x12, /* QSFP-DD */ +}; + +struct fun_admin_port_notif { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0; + __be16 id; + __be32 speed; /* in 10 Mbps units */ + + __u8 link_state; + __u8 missed_events; + __u8 link_down_reason; + __u8 xcvr_type; + __u8 flow_ctrl; + __u8 fec; + __u8 active_lanes; + __u8 rsvd1; + + __be64 advertising; + + __be64 lp_advertising; +}; + +enum fun_eth_rss_const { + FUN_ETH_RSS_MAX_KEY_SIZE = 0x28, + FUN_ETH_RSS_MAX_INDIR_ENT = 0x40, +}; + +enum fun_eth_hash_alg { + FUN_ETH_RSS_ALG_INVALID = 0x0, + FUN_ETH_RSS_ALG_TOEPLITZ = 0x1, + FUN_ETH_RSS_ALG_CRC32 = 0x2, +}; + +struct fun_admin_rss_req { + struct fun_admin_req_common common; + + union rss_req_subop { + struct fun_admin_rss_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 viid; /* VI flow id */ + + __be64 metadata[1]; + + __u8 alg; + __u8 keylen; + __u8 indir_nent; + __u8 rsvd2; + __be16 key_off; + __be16 indir_off; + + struct fun_dataop_hdr dataop; + } create; + } u; +}; + +#define FUN_ADMIN_RSS_CREATE_REQ_INIT(_subop, _flags, _id, _viid, _alg, \ + _keylen, _indir_nent, _key_off, \ + _indir_off) \ + (struct fun_admin_rss_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .viid = cpu_to_be32(_viid), \ + .alg = _alg, .keylen = _keylen, .indir_nent = _indir_nent, \ + .key_off = cpu_to_be16(_key_off), \ + .indir_off = cpu_to_be16(_indir_off), \ + } + +struct fun_admin_vi_req { + struct fun_admin_req_common common; + + union vi_req_subop { + struct fun_admin_vi_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 portid; /* port flow id */ + } create; + } u; +}; + +#define FUN_ADMIN_VI_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \ + (struct fun_admin_vi_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \ + } + +struct fun_admin_eth_req { + struct fun_admin_req_common common; + + union eth_req_subop { + struct fun_admin_eth_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 portid; /* port flow id */ + } create; + } u; +}; + +#define FUN_ADMIN_ETH_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \ + (struct fun_admin_eth_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \ + } + +enum { + FUN_ADMIN_SWU_UPGRADE_FLAG_INIT = 0x10, + FUN_ADMIN_SWU_UPGRADE_FLAG_COMPLETE = 0x20, + FUN_ADMIN_SWU_UPGRADE_FLAG_DOWNGRADE = 0x40, + FUN_ADMIN_SWU_UPGRADE_FLAG_ACTIVE_IMAGE = 0x80, + FUN_ADMIN_SWU_UPGRADE_FLAG_ASYNC = 0x1, +}; + +enum fun_admin_swu_subop { + FUN_ADMIN_SWU_SUBOP_GET_VERSION = 0x20, + FUN_ADMIN_SWU_SUBOP_UPGRADE = 0x21, + FUN_ADMIN_SWU_SUBOP_UPGRADE_DATA = 0x22, + FUN_ADMIN_SWU_SUBOP_GET_ALL_VERSIONS = 0x23, +}; + +struct fun_admin_swu_req { + struct fun_admin_req_common common; + + union swu_req_subop { + struct fun_admin_swu_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_swu_upgrade_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 fourcc; + __be32 rsvd1; + + __be64 image_size; /* upgrade image length */ + } upgrade; + struct fun_admin_swu_upgrade_data_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 offset; /* offset of data in this command */ + __be32 size; /* total size of data in this command */ + } upgrade_data; + } u; + + struct fun_subop_sgl sgl[]; /* in, out buffers through sgl */ +}; + +#define FUN_ADMIN_SWU_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_swu_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_SWU_UPGRADE_REQ_INIT(_subop, _flags, _id, _fourcc, \ + _image_size) \ + (struct fun_admin_swu_upgrade_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .fourcc = cpu_to_be32(_fourcc), \ + .image_size = cpu_to_be64(_image_size), \ + } + +#define FUN_ADMIN_SWU_UPGRADE_DATA_REQ_INIT(_subop, _flags, _id, _offset, \ + _size) \ + (struct fun_admin_swu_upgrade_data_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .offset = cpu_to_be32(_offset), \ + .size = cpu_to_be32(_size), \ + } + +struct fun_admin_swu_rsp { + struct fun_admin_rsp_common common; + + union swu_rsp_subop { + struct fun_admin_swu_create_rsp { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_swu_upgrade_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be32 fourcc; + __be32 status; + + __be32 progress; + __be32 unused; + } upgrade; + struct fun_admin_swu_upgrade_data_rsp { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 offset; + __be32 size; + } upgrade_data; + } u; +}; + +enum fun_ktls_version { + FUN_KTLS_TLSV2 = 0x20, + FUN_KTLS_TLSV3 = 0x30, +}; + +enum fun_ktls_cipher { + FUN_KTLS_CIPHER_AES_GCM_128 = 0x33, + FUN_KTLS_CIPHER_AES_GCM_256 = 0x34, + FUN_KTLS_CIPHER_AES_CCM_128 = 0x35, + FUN_KTLS_CIPHER_CHACHA20_POLY1305 = 0x36, +}; + +enum fun_ktls_modify_flags { + FUN_KTLS_MODIFY_REMOVE = 0x1, +}; + +struct fun_admin_ktls_create_req { + struct fun_admin_req_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; +}; + +#define FUN_ADMIN_KTLS_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_ktls_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +struct fun_admin_ktls_create_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0[3]; + __be32 id; +}; + +struct fun_admin_ktls_modify_req { + struct fun_admin_req_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be64 tlsid; + + __be32 tcp_seq; + __u8 version; + __u8 cipher; + __u8 rsvd1[2]; + + __u8 record_seq[8]; + + __u8 key[32]; + + __u8 iv[16]; + + __u8 salt[8]; +}; + +#define FUN_ADMIN_KTLS_MODIFY_REQ_INIT(_subop, _flags, _id, _tlsid, _tcp_seq, \ + _version, _cipher) \ + (struct fun_admin_ktls_modify_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .tlsid = cpu_to_be64(_tlsid), \ + .tcp_seq = cpu_to_be32(_tcp_seq), .version = _version, \ + .cipher = _cipher, \ + } + +struct fun_admin_ktls_modify_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be64 tlsid; +}; + +struct fun_req_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 rsvd0; + __be16 cid; +}; + +struct fun_rsp_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 ret; + __be16 cid; +}; + +struct fun_cqe_info { + __be16 sqhd; + __be16 sqid; + __be16 cid; + __be16 sf_p; +}; + +enum fun_eprq_def { + FUN_EPRQ_PKT_ALIGN = 0x80, +}; + +struct fun_eprq_rqbuf { + __be64 bufaddr; +}; + +#define FUN_EPRQ_RQBUF_INIT(_bufaddr) \ + (struct fun_eprq_rqbuf) { \ + .bufaddr = cpu_to_be64(_bufaddr), \ + } + +enum fun_eth_op { + FUN_ETH_OP_TX = 0x1, + FUN_ETH_OP_RX = 0x2, +}; + +enum { + FUN_ETH_OFFLOAD_EN = 0x8000, + FUN_ETH_OUTER_EN = 0x4000, + FUN_ETH_INNER_LSO = 0x2000, + FUN_ETH_INNER_TSO = 0x1000, + FUN_ETH_OUTER_IPV6 = 0x800, + FUN_ETH_OUTER_UDP = 0x400, + FUN_ETH_INNER_IPV6 = 0x200, + FUN_ETH_INNER_UDP = 0x100, + FUN_ETH_UPDATE_OUTER_L3_LEN = 0x80, + FUN_ETH_UPDATE_OUTER_L3_CKSUM = 0x40, + FUN_ETH_UPDATE_OUTER_L4_LEN = 0x20, + FUN_ETH_UPDATE_OUTER_L4_CKSUM = 0x10, + FUN_ETH_UPDATE_INNER_L3_LEN = 0x8, + FUN_ETH_UPDATE_INNER_L3_CKSUM = 0x4, + FUN_ETH_UPDATE_INNER_L4_LEN = 0x2, + FUN_ETH_UPDATE_INNER_L4_CKSUM = 0x1, +}; + +struct fun_eth_offload { + __be16 flags; /* combination of above flags */ + __be16 mss; /* TSO max seg size */ + __be16 tcp_doff_flags; /* TCP data offset + flags 16b word */ + __be16 vlan; + + __be16 inner_l3_off; /* Inner L3 header offset */ + __be16 inner_l4_off; /* Inner L4 header offset */ + __be16 outer_l3_off; /* Outer L3 header offset */ + __be16 outer_l4_off; /* Outer L4 header offset */ +}; + +static inline void fun_eth_offload_init(struct fun_eth_offload *s, u16 flags, + u16 mss, __be16 tcp_doff_flags, + __be16 vlan, u16 inner_l3_off, + u16 inner_l4_off, u16 outer_l3_off, + u16 outer_l4_off) +{ + s->flags = cpu_to_be16(flags); + s->mss = cpu_to_be16(mss); + s->tcp_doff_flags = tcp_doff_flags; + s->vlan = vlan; + s->inner_l3_off = cpu_to_be16(inner_l3_off); + s->inner_l4_off = cpu_to_be16(inner_l4_off); + s->outer_l3_off = cpu_to_be16(outer_l3_off); + s->outer_l4_off = cpu_to_be16(outer_l4_off); +} + +struct fun_eth_tls { + __be64 tlsid; +}; + +enum { + FUN_ETH_TX_TLS = 0x8000, +}; + +struct fun_eth_tx_req { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 repr_idn; + __be16 encap_proto; + + struct fun_eth_offload offload; + + struct fun_dataop_hdr dataop; +}; + +struct fun_eth_rx_cv { + __be16 il4_prot_to_l2_type; +}; + +#define FUN_ETH_RX_CV_IL4_PROT_S 13U +#define FUN_ETH_RX_CV_IL4_PROT_M 0x3 + +#define FUN_ETH_RX_CV_IL3_PROT_S 11U +#define FUN_ETH_RX_CV_IL3_PROT_M 0x3 + +#define FUN_ETH_RX_CV_OL4_PROT_S 8U +#define FUN_ETH_RX_CV_OL4_PROT_M 0x7 + +#define FUN_ETH_RX_CV_ENCAP_TYPE_S 6U +#define FUN_ETH_RX_CV_ENCAP_TYPE_M 0x3 + +#define FUN_ETH_RX_CV_OL3_PROT_S 4U +#define FUN_ETH_RX_CV_OL3_PROT_M 0x3 + +#define FUN_ETH_RX_CV_VLAN_TYPE_S 3U +#define FUN_ETH_RX_CV_VLAN_TYPE_M 0x1 + +#define FUN_ETH_RX_CV_L2_TYPE_S 2U +#define FUN_ETH_RX_CV_L2_TYPE_M 0x1 + +enum fun_rx_cv { + FUN_RX_CV_NONE = 0x0, + FUN_RX_CV_IP = 0x2, + FUN_RX_CV_IP6 = 0x3, + FUN_RX_CV_TCP = 0x2, + FUN_RX_CV_UDP = 0x3, + FUN_RX_CV_VXLAN = 0x2, + FUN_RX_CV_MPLS = 0x3, +}; + +struct fun_eth_cqe { + __u8 op; + __u8 len8; + __u8 nsgl; + __u8 repr_idn; + __be32 pkt_len; + + __be64 timestamp; + + __be16 pkt_cv; + __be16 rsvd0; + __be32 hash; + + __be16 encap_proto; + __be16 vlan; + __be32 rsvd1; + + __be32 buf_offset; + __be16 headroom; + __be16 csum; +}; + +enum fun_admin_adi_attr { + FUN_ADMIN_ADI_ATTR_MACADDR = 0x1, + FUN_ADMIN_ADI_ATTR_VLAN = 0x2, + FUN_ADMIN_ADI_ATTR_RATE = 0x3, +}; + +struct fun_adi_param { + union adi_param { + struct fun_adi_mac { + __be64 addr; + } mac; + struct fun_adi_vlan { + __be32 rsvd; + __be16 eth_type; + __be16 tci; + } vlan; + struct fun_adi_rate { + __be32 rsvd; + __be32 tx_mbps; + } rate; + } u; +}; + +#define FUN_ADI_MAC_INIT(_addr) \ + (struct fun_adi_mac) { \ + .addr = cpu_to_be64(_addr), \ + } + +#define FUN_ADI_VLAN_INIT(_eth_type, _tci) \ + (struct fun_adi_vlan) { \ + .eth_type = cpu_to_be16(_eth_type), .tci = cpu_to_be16(_tci), \ + } + +#define FUN_ADI_RATE_INIT(_tx_mbps) \ + (struct fun_adi_rate) { \ + .tx_mbps = cpu_to_be32(_tx_mbps), \ + } + +struct fun_admin_adi_req { + struct fun_admin_req_common common; + + union adi_req_subop { + struct fun_admin_adi_write_req { + __u8 subop; + __u8 attribute; + __be16 rsvd; + __be32 id; + + struct fun_adi_param param; + } write; + } u; +}; + +#define FUN_ADMIN_ADI_WRITE_REQ_INIT(_subop, _attribute, _id) \ + (struct fun_admin_adi_write_req) { \ + .subop = (_subop), .attribute = (_attribute), \ + .id = cpu_to_be32(_id), \ + } + +#endif /* __FUN_HCI_H */ diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.c b/drivers/net/ethernet/fungible/funcore/fun_queue.c new file mode 100644 index 000000000000..8ab9f68434f5 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_queue.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/log2.h> +#include <linux/mm.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include "fun_dev.h" +#include "fun_queue.h" + +/* Allocate memory for a queue. This includes the memory for the HW descriptor + * ring, an optional 64b HW write-back area, and an optional SW state ring. + * Returns the virtual and DMA addresses of the HW ring, the VA of the SW ring, + * and the VA of the write-back area. + */ +void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth, + size_t hw_desc_sz, size_t sw_desc_sz, bool wb, + int numa_node, dma_addr_t *dma_addr, void **sw_va, + volatile __be64 **wb_va) +{ + int dev_node = dev_to_node(dma_dev); + size_t dma_sz; + void *va; + + if (numa_node == NUMA_NO_NODE) + numa_node = dev_node; + + /* Place optional write-back area at end of descriptor ring. */ + dma_sz = hw_desc_sz * depth; + if (wb) + dma_sz += sizeof(u64); + + set_dev_node(dma_dev, numa_node); + va = dma_alloc_coherent(dma_dev, dma_sz, dma_addr, GFP_KERNEL); + set_dev_node(dma_dev, dev_node); + if (!va) + return NULL; + + if (sw_desc_sz) { + *sw_va = kvzalloc_node(sw_desc_sz * depth, GFP_KERNEL, + numa_node); + if (!*sw_va) { + dma_free_coherent(dma_dev, dma_sz, va, *dma_addr); + return NULL; + } + } + + if (wb) + *wb_va = va + dma_sz - sizeof(u64); + return va; +} +EXPORT_SYMBOL_GPL(fun_alloc_ring_mem); + +void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz, + bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va) +{ + if (hw_va) { + size_t sz = depth * hw_desc_sz; + + if (wb) + sz += sizeof(u64); + dma_free_coherent(dma_dev, sz, hw_va, dma_addr); + } + kvfree(sw_va); +} +EXPORT_SYMBOL_GPL(fun_free_ring_mem); + +/* Prepare and issue an admin command to create an SQ on the device with the + * provided parameters. If the queue ID is auto-allocated by the device it is + * returned in *sqidp. + */ +int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid, + u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr, + u8 coal_nentries, u8 coal_usec, u32 irq_num, + u32 scan_start_id, u32 scan_end_id, + u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp) +{ + union { + struct fun_admin_epsq_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + dma_addr_t wb_addr; + u32 hw_qid; + int rc; + + if (sq_depth > fdev->q_depth) + return -EINVAL; + if (flags & FUN_ADMIN_EPSQ_CREATE_FLAG_RQ) + sqe_size_log2 = ilog2(sizeof(struct fun_eprq_rqbuf)); + + wb_addr = dma_addr + (sq_depth << sqe_size_log2); + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPSQ, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_EPSQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags, + sqid, cqid, sqe_size_log2, + sq_depth - 1, dma_addr, 0, + coal_nentries, coal_usec, + irq_num, scan_start_id, + scan_end_id, 0, + rq_buf_size_log2, + ilog2(sizeof(u64)), wb_addr); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (rc) + return rc; + + hw_qid = be32_to_cpu(cmd.rsp.id); + *dbp = fun_sq_db_addr(fdev, hw_qid); + if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR) + *sqidp = hw_qid; + return rc; +} +EXPORT_SYMBOL_GPL(fun_sq_create); + +/* Prepare and issue an admin command to create a CQ on the device with the + * provided parameters. If the queue ID is auto-allocated by the device it is + * returned in *cqidp. + */ +int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid, + u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr, + u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec, + u32 irq_num, u32 scan_start_id, u32 scan_end_id, u32 *cqidp, + u32 __iomem **dbp) +{ + union { + struct fun_admin_epcq_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + u32 hw_qid; + int rc; + + if (cq_depth > fdev->q_depth) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_EPCQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags, + cqid, rqid, cqe_size_log2, + cq_depth - 1, dma_addr, tailroom, + headroom / 2, 0, coal_nentries, + coal_usec, irq_num, + scan_start_id, scan_end_id, 0); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (rc) + return rc; + + hw_qid = be32_to_cpu(cmd.rsp.id); + *dbp = fun_cq_db_addr(fdev, hw_qid); + if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR) + *cqidp = hw_qid; + return rc; +} +EXPORT_SYMBOL_GPL(fun_cq_create); + +static bool fun_sq_is_head_wb(const struct fun_queue *funq) +{ + return funq->sq_flags & FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS; +} + +static void fun_clean_rq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + struct fun_rq_info *rqinfo; + unsigned int i; + + for (i = 0; i < funq->rq_depth; i++) { + rqinfo = &funq->rq_info[i]; + if (rqinfo->page) { + dma_unmap_page(fdev->dev, rqinfo->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + put_page(rqinfo->page); + rqinfo->page = NULL; + } + } +} + +static int fun_fill_rq(struct fun_queue *funq) +{ + struct device *dev = funq->fdev->dev; + int i, node = dev_to_node(dev); + struct fun_rq_info *rqinfo; + + for (i = 0; i < funq->rq_depth; i++) { + rqinfo = &funq->rq_info[i]; + rqinfo->page = alloc_pages_node(node, GFP_KERNEL, 0); + if (unlikely(!rqinfo->page)) + return -ENOMEM; + + rqinfo->dma = dma_map_page(dev, rqinfo->page, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, rqinfo->dma))) { + put_page(rqinfo->page); + rqinfo->page = NULL; + return -ENOMEM; + } + + funq->rqes[i] = FUN_EPRQ_RQBUF_INIT(rqinfo->dma); + } + + funq->rq_tail = funq->rq_depth - 1; + return 0; +} + +static void fun_rq_update_pos(struct fun_queue *funq, int buf_offset) +{ + if (buf_offset <= funq->rq_buf_offset) { + struct fun_rq_info *rqinfo = &funq->rq_info[funq->rq_buf_idx]; + struct device *dev = funq->fdev->dev; + + dma_sync_single_for_device(dev, rqinfo->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + funq->num_rqe_to_fill++; + if (++funq->rq_buf_idx == funq->rq_depth) + funq->rq_buf_idx = 0; + } + funq->rq_buf_offset = buf_offset; +} + +/* Given a command response with data scattered across >= 1 RQ buffers return + * a pointer to a contiguous buffer containing all the data. If the data is in + * one RQ buffer the start address within that buffer is returned, otherwise a + * new buffer is allocated and the data is gathered into it. + */ +static void *fun_data_from_rq(struct fun_queue *funq, + const struct fun_rsp_common *rsp, bool *need_free) +{ + u32 bufoff, total_len, remaining, fragsize, dataoff; + struct device *dma_dev = funq->fdev->dev; + const struct fun_dataop_rqbuf *databuf; + const struct fun_dataop_hdr *dataop; + const struct fun_rq_info *rqinfo; + void *data; + + dataop = (void *)rsp + rsp->suboff8 * 8; + total_len = be32_to_cpu(dataop->total_len); + + if (likely(dataop->nsgl == 1)) { + databuf = (struct fun_dataop_rqbuf *)dataop->imm; + bufoff = be32_to_cpu(databuf->bufoff); + fun_rq_update_pos(funq, bufoff); + rqinfo = &funq->rq_info[funq->rq_buf_idx]; + dma_sync_single_for_cpu(dma_dev, rqinfo->dma + bufoff, + total_len, DMA_FROM_DEVICE); + *need_free = false; + return page_address(rqinfo->page) + bufoff; + } + + /* For scattered completions gather the fragments into one buffer. */ + + data = kmalloc(total_len, GFP_ATOMIC); + /* NULL is OK here. In case of failure we still need to consume the data + * for proper buffer accounting but indicate an error in the response. + */ + if (likely(data)) + *need_free = true; + + dataoff = 0; + for (remaining = total_len; remaining; remaining -= fragsize) { + fun_rq_update_pos(funq, 0); + fragsize = min_t(unsigned int, PAGE_SIZE, remaining); + if (data) { + rqinfo = &funq->rq_info[funq->rq_buf_idx]; + dma_sync_single_for_cpu(dma_dev, rqinfo->dma, fragsize, + DMA_FROM_DEVICE); + memcpy(data + dataoff, page_address(rqinfo->page), + fragsize); + dataoff += fragsize; + } + } + return data; +} + +unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max) +{ + const struct fun_cqe_info *info; + struct fun_rsp_common *rsp; + unsigned int new_cqes; + u16 sf_p, flags; + bool need_free; + void *cqe; + + if (!max) + max = funq->cq_depth - 1; + + for (new_cqes = 0; new_cqes < max; new_cqes++) { + cqe = funq->cqes + (funq->cq_head << funq->cqe_size_log2); + info = funq_cqe_info(funq, cqe); + sf_p = be16_to_cpu(info->sf_p); + + if ((sf_p & 1) != funq->cq_phase) + break; + + /* ensure the phase tag is read before other CQE fields */ + dma_rmb(); + + if (++funq->cq_head == funq->cq_depth) { + funq->cq_head = 0; + funq->cq_phase = !funq->cq_phase; + } + + rsp = cqe; + flags = be16_to_cpu(rsp->flags); + + need_free = false; + if (unlikely(flags & FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF)) { + rsp = fun_data_from_rq(funq, rsp, &need_free); + if (!rsp) { + rsp = cqe; + rsp->len8 = 1; + if (rsp->ret == 0) + rsp->ret = ENOMEM; + } + } + + if (funq->cq_cb) + funq->cq_cb(funq, funq->cb_data, rsp, info); + if (need_free) + kfree(rsp); + } + + dev_dbg(funq->fdev->dev, "CQ %u, new CQEs %u/%u, head %u, phase %u\n", + funq->cqid, new_cqes, max, funq->cq_head, funq->cq_phase); + return new_cqes; +} + +unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max) +{ + unsigned int processed; + u32 db; + + processed = __fun_process_cq(funq, max); + + if (funq->num_rqe_to_fill) { + funq->rq_tail = (funq->rq_tail + funq->num_rqe_to_fill) % + funq->rq_depth; + funq->num_rqe_to_fill = 0; + writel(funq->rq_tail, funq->rq_db); + } + + db = funq->cq_head | FUN_DB_IRQ_ARM_F; + writel(db, funq->cq_db); + return processed; +} + +static int fun_alloc_sqes(struct fun_queue *funq) +{ + funq->sq_cmds = fun_alloc_ring_mem(funq->fdev->dev, funq->sq_depth, + 1 << funq->sqe_size_log2, 0, + fun_sq_is_head_wb(funq), + NUMA_NO_NODE, &funq->sq_dma_addr, + NULL, &funq->sq_head); + return funq->sq_cmds ? 0 : -ENOMEM; +} + +static int fun_alloc_cqes(struct fun_queue *funq) +{ + funq->cqes = fun_alloc_ring_mem(funq->fdev->dev, funq->cq_depth, + 1 << funq->cqe_size_log2, 0, false, + NUMA_NO_NODE, &funq->cq_dma_addr, NULL, + NULL); + return funq->cqes ? 0 : -ENOMEM; +} + +static int fun_alloc_rqes(struct fun_queue *funq) +{ + funq->rqes = fun_alloc_ring_mem(funq->fdev->dev, funq->rq_depth, + sizeof(*funq->rqes), + sizeof(*funq->rq_info), false, + NUMA_NO_NODE, &funq->rq_dma_addr, + (void **)&funq->rq_info, NULL); + return funq->rqes ? 0 : -ENOMEM; +} + +/* Free a queue's structures. */ +void fun_free_queue(struct fun_queue *funq) +{ + struct device *dev = funq->fdev->dev; + + fun_free_ring_mem(dev, funq->cq_depth, 1 << funq->cqe_size_log2, false, + funq->cqes, funq->cq_dma_addr, NULL); + fun_free_ring_mem(dev, funq->sq_depth, 1 << funq->sqe_size_log2, + fun_sq_is_head_wb(funq), funq->sq_cmds, + funq->sq_dma_addr, NULL); + + if (funq->rqes) { + fun_clean_rq(funq); + fun_free_ring_mem(dev, funq->rq_depth, sizeof(*funq->rqes), + false, funq->rqes, funq->rq_dma_addr, + funq->rq_info); + } + + kfree(funq); +} + +/* Allocate and initialize a funq's structures. */ +struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid, + const struct fun_queue_alloc_req *req) +{ + struct fun_queue *funq = kzalloc(sizeof(*funq), GFP_KERNEL); + + if (!funq) + return NULL; + + funq->fdev = fdev; + spin_lock_init(&funq->sq_lock); + + funq->qid = qid; + + /* Initial CQ/SQ/RQ ids */ + if (req->rq_depth) { + funq->cqid = 2 * qid; + if (funq->qid) { + /* I/O Q: use rqid = cqid, sqid = +1 */ + funq->rqid = funq->cqid; + funq->sqid = funq->rqid + 1; + } else { + /* Admin Q: sqid is always 0, use ID 1 for RQ */ + funq->sqid = 0; + funq->rqid = 1; + } + } else { + funq->cqid = qid; + funq->sqid = qid; + } + + funq->cq_flags = req->cq_flags; + funq->sq_flags = req->sq_flags; + + funq->cqe_size_log2 = req->cqe_size_log2; + funq->sqe_size_log2 = req->sqe_size_log2; + + funq->cq_depth = req->cq_depth; + funq->sq_depth = req->sq_depth; + + funq->cq_intcoal_nentries = req->cq_intcoal_nentries; + funq->cq_intcoal_usec = req->cq_intcoal_usec; + + funq->sq_intcoal_nentries = req->sq_intcoal_nentries; + funq->sq_intcoal_usec = req->sq_intcoal_usec; + + if (fun_alloc_cqes(funq)) + goto free_funq; + + funq->cq_phase = 1; + + if (fun_alloc_sqes(funq)) + goto free_funq; + + if (req->rq_depth) { + funq->rq_flags = req->rq_flags | FUN_ADMIN_EPSQ_CREATE_FLAG_RQ; + funq->rq_depth = req->rq_depth; + funq->rq_buf_offset = -1; + + if (fun_alloc_rqes(funq) || fun_fill_rq(funq)) + goto free_funq; + } + + funq->cq_vector = -1; + funq->cqe_info_offset = (1 << funq->cqe_size_log2) - sizeof(struct fun_cqe_info); + + /* SQ/CQ 0 are implicitly created, assign their doorbells now. + * Other queues are assigned doorbells at their explicit creation. + */ + if (funq->sqid == 0) + funq->sq_db = fun_sq_db_addr(fdev, 0); + if (funq->cqid == 0) + funq->cq_db = fun_cq_db_addr(fdev, 0); + + return funq; + +free_funq: + fun_free_queue(funq); + return NULL; +} + +/* Create a funq's CQ on the device. */ +static int fun_create_cq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + unsigned int rqid; + int rc; + + rqid = funq->cq_flags & FUN_ADMIN_EPCQ_CREATE_FLAG_RQ ? + funq->rqid : FUN_HCI_ID_INVALID; + rc = fun_cq_create(fdev, funq->cq_flags, funq->cqid, rqid, + funq->cqe_size_log2, funq->cq_depth, + funq->cq_dma_addr, 0, 0, funq->cq_intcoal_nentries, + funq->cq_intcoal_usec, funq->cq_vector, 0, 0, + &funq->cqid, &funq->cq_db); + if (!rc) + dev_dbg(fdev->dev, "created CQ %u\n", funq->cqid); + + return rc; +} + +/* Create a funq's SQ on the device. */ +static int fun_create_sq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + int rc; + + rc = fun_sq_create(fdev, funq->sq_flags, funq->sqid, funq->cqid, + funq->sqe_size_log2, funq->sq_depth, + funq->sq_dma_addr, funq->sq_intcoal_nentries, + funq->sq_intcoal_usec, funq->cq_vector, 0, 0, + 0, &funq->sqid, &funq->sq_db); + if (!rc) + dev_dbg(fdev->dev, "created SQ %u\n", funq->sqid); + + return rc; +} + +/* Create a funq's RQ on the device. */ +int fun_create_rq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + int rc; + + rc = fun_sq_create(fdev, funq->rq_flags, funq->rqid, funq->cqid, 0, + funq->rq_depth, funq->rq_dma_addr, 0, 0, + funq->cq_vector, 0, 0, PAGE_SHIFT, &funq->rqid, + &funq->rq_db); + if (!rc) + dev_dbg(fdev->dev, "created RQ %u\n", funq->rqid); + + return rc; +} + +static unsigned int funq_irq(struct fun_queue *funq) +{ + return pci_irq_vector(to_pci_dev(funq->fdev->dev), funq->cq_vector); +} + +int fun_request_irq(struct fun_queue *funq, const char *devname, + irq_handler_t handler, void *data) +{ + int rc; + + if (funq->cq_vector < 0) + return -EINVAL; + + funq->irq_handler = handler; + funq->irq_data = data; + + snprintf(funq->irqname, sizeof(funq->irqname), + funq->qid ? "%s-q[%d]" : "%s-adminq", devname, funq->qid); + + rc = request_irq(funq_irq(funq), handler, 0, funq->irqname, data); + if (rc) + funq->irq_handler = NULL; + + return rc; +} + +/* Create all component queues of a funq on the device. */ +int fun_create_queue(struct fun_queue *funq) +{ + int rc; + + rc = fun_create_cq(funq); + if (rc) + return rc; + + if (funq->rq_depth) { + rc = fun_create_rq(funq); + if (rc) + goto release_cq; + } + + rc = fun_create_sq(funq); + if (rc) + goto release_rq; + + return 0; + +release_rq: + fun_destroy_sq(funq->fdev, funq->rqid); +release_cq: + fun_destroy_cq(funq->fdev, funq->cqid); + return rc; +} + +void fun_free_irq(struct fun_queue *funq) +{ + if (funq->irq_handler) { + unsigned int vector = funq_irq(funq); + + free_irq(vector, funq->irq_data); + funq->irq_handler = NULL; + funq->irq_data = NULL; + } +} diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.h b/drivers/net/ethernet/fungible/funcore/fun_queue.h new file mode 100644 index 000000000000..7fb53d0ae8b0 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_queue.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_QEUEUE_H +#define _FUN_QEUEUE_H + +#include <linux/interrupt.h> +#include <linux/io.h> + +struct device; +struct fun_dev; +struct fun_queue; +struct fun_cqe_info; +struct fun_rsp_common; + +typedef void (*cq_callback_t)(struct fun_queue *funq, void *data, void *msg, + const struct fun_cqe_info *info); + +struct fun_rq_info { + dma_addr_t dma; + struct page *page; +}; + +/* A queue group consisting of an SQ, a CQ, and an optional RQ. */ +struct fun_queue { + struct fun_dev *fdev; + spinlock_t sq_lock; + + dma_addr_t cq_dma_addr; + dma_addr_t sq_dma_addr; + dma_addr_t rq_dma_addr; + + u32 __iomem *cq_db; + u32 __iomem *sq_db; + u32 __iomem *rq_db; + + void *cqes; + void *sq_cmds; + struct fun_eprq_rqbuf *rqes; + struct fun_rq_info *rq_info; + + u32 cqid; + u32 sqid; + u32 rqid; + + u32 cq_depth; + u32 sq_depth; + u32 rq_depth; + + u16 cq_head; + u16 sq_tail; + u16 rq_tail; + + u8 cqe_size_log2; + u8 sqe_size_log2; + + u16 cqe_info_offset; + + u16 rq_buf_idx; + int rq_buf_offset; + u16 num_rqe_to_fill; + + u8 cq_intcoal_usec; + u8 cq_intcoal_nentries; + u8 sq_intcoal_usec; + u8 sq_intcoal_nentries; + + u16 cq_flags; + u16 sq_flags; + u16 rq_flags; + + /* SQ head writeback */ + u16 sq_comp; + + volatile __be64 *sq_head; + + cq_callback_t cq_cb; + void *cb_data; + + irq_handler_t irq_handler; + void *irq_data; + s16 cq_vector; + u8 cq_phase; + + /* I/O q index */ + u16 qid; + + char irqname[24]; +}; + +static inline void *fun_sqe_at(const struct fun_queue *funq, unsigned int pos) +{ + return funq->sq_cmds + (pos << funq->sqe_size_log2); +} + +static inline void funq_sq_post_tail(struct fun_queue *funq, u16 tail) +{ + if (++tail == funq->sq_depth) + tail = 0; + funq->sq_tail = tail; + writel(tail, funq->sq_db); +} + +static inline struct fun_cqe_info *funq_cqe_info(const struct fun_queue *funq, + void *cqe) +{ + return cqe + funq->cqe_info_offset; +} + +static inline void funq_rq_post(struct fun_queue *funq) +{ + writel(funq->rq_tail, funq->rq_db); +} + +struct fun_queue_alloc_req { + u8 cqe_size_log2; + u8 sqe_size_log2; + + u16 cq_flags; + u16 sq_flags; + u16 rq_flags; + + u32 cq_depth; + u32 sq_depth; + u32 rq_depth; + + u8 cq_intcoal_usec; + u8 cq_intcoal_nentries; + u8 sq_intcoal_usec; + u8 sq_intcoal_nentries; +}; + +int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid, + u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr, + u8 coal_nentries, u8 coal_usec, u32 irq_num, + u32 scan_start_id, u32 scan_end_id, + u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp); +int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid, + u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr, + u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec, + u32 irq_num, u32 scan_start_id, u32 scan_end_id, + u32 *cqidp, u32 __iomem **dbp); +void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth, + size_t hw_desc_sz, size_t sw_desc_size, bool wb, + int numa_node, dma_addr_t *dma_addr, void **sw_va, + volatile __be64 **wb_va); +void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz, + bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va); + +#define fun_destroy_sq(fdev, sqid) \ + fun_res_destroy((fdev), FUN_ADMIN_OP_EPSQ, 0, (sqid)) +#define fun_destroy_cq(fdev, cqid) \ + fun_res_destroy((fdev), FUN_ADMIN_OP_EPCQ, 0, (cqid)) + +struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid, + const struct fun_queue_alloc_req *req); +void fun_free_queue(struct fun_queue *funq); + +static inline void fun_set_cq_callback(struct fun_queue *funq, cq_callback_t cb, + void *cb_data) +{ + funq->cq_cb = cb; + funq->cb_data = cb_data; +} + +int fun_create_rq(struct fun_queue *funq); +int fun_create_queue(struct fun_queue *funq); + +void fun_free_irq(struct fun_queue *funq); +int fun_request_irq(struct fun_queue *funq, const char *devname, + irq_handler_t handler, void *data); + +unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max); +unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max); + +#endif /* _FUN_QEUEUE_H */ diff --git a/drivers/net/ethernet/fungible/funeth/Kconfig b/drivers/net/ethernet/fungible/funeth/Kconfig new file mode 100644 index 000000000000..c72ad9386400 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Fungible Ethernet driver configuration +# + +config FUN_ETH + tristate "Fungible Ethernet device driver" + depends on PCI && PCI_MSI + depends on TLS && TLS_DEVICE || TLS_DEVICE=n + select NET_DEVLINK + select FUN_CORE + help + This driver supports the Ethernet functionality of Fungible adapters. + It works with both physical and virtual functions. + + To compile this driver as a module, choose M here. The module + will be called funeth. diff --git a/drivers/net/ethernet/fungible/funeth/Makefile b/drivers/net/ethernet/fungible/funeth/Makefile new file mode 100644 index 000000000000..646d69595b4f --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +ccflags-y += -I$(srctree)/$(src)/../funcore -I$(srctree)/$(src) + +obj-$(CONFIG_FUN_ETH) += funeth.o + +funeth-y := funeth_main.o funeth_rx.o funeth_tx.o funeth_devlink.o \ + funeth_ethtool.o + +funeth-$(CONFIG_TLS_DEVICE) += funeth_ktls.o diff --git a/drivers/net/ethernet/fungible/funeth/fun_port.h b/drivers/net/ethernet/fungible/funeth/fun_port.h new file mode 100644 index 000000000000..0f9da44e3786 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/fun_port.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_PORT_H +#define _FUN_PORT_H + +enum port_mac_rx_stats { + PORT_MAC_RX_etherStatsOctets = 0x0, + PORT_MAC_RX_OctetsReceivedOK = 0x1, + PORT_MAC_RX_aAlignmentErrors = 0x2, + PORT_MAC_RX_aPAUSEMACCtrlFramesReceived = 0x3, + PORT_MAC_RX_aFrameTooLongErrors = 0x4, + PORT_MAC_RX_aInRangeLengthErrors = 0x5, + PORT_MAC_RX_aFramesReceivedOK = 0x6, + PORT_MAC_RX_aFrameCheckSequenceErrors = 0x7, + PORT_MAC_RX_VLANReceivedOK = 0x8, + PORT_MAC_RX_ifInErrors = 0x9, + PORT_MAC_RX_ifInUcastPkts = 0xa, + PORT_MAC_RX_ifInMulticastPkts = 0xb, + PORT_MAC_RX_ifInBroadcastPkts = 0xc, + PORT_MAC_RX_etherStatsDropEvents = 0xd, + PORT_MAC_RX_etherStatsPkts = 0xe, + PORT_MAC_RX_etherStatsUndersizePkts = 0xf, + PORT_MAC_RX_etherStatsPkts64Octets = 0x10, + PORT_MAC_RX_etherStatsPkts65to127Octets = 0x11, + PORT_MAC_RX_etherStatsPkts128to255Octets = 0x12, + PORT_MAC_RX_etherStatsPkts256to511Octets = 0x13, + PORT_MAC_RX_etherStatsPkts512to1023Octets = 0x14, + PORT_MAC_RX_etherStatsPkts1024to1518Octets = 0x15, + PORT_MAC_RX_etherStatsPkts1519toMaxOctets = 0x16, + PORT_MAC_RX_etherStatsOversizePkts = 0x17, + PORT_MAC_RX_etherStatsJabbers = 0x18, + PORT_MAC_RX_etherStatsFragments = 0x19, + PORT_MAC_RX_CBFCPAUSEFramesReceived_0 = 0x1a, + PORT_MAC_RX_CBFCPAUSEFramesReceived_1 = 0x1b, + PORT_MAC_RX_CBFCPAUSEFramesReceived_2 = 0x1c, + PORT_MAC_RX_CBFCPAUSEFramesReceived_3 = 0x1d, + PORT_MAC_RX_CBFCPAUSEFramesReceived_4 = 0x1e, + PORT_MAC_RX_CBFCPAUSEFramesReceived_5 = 0x1f, + PORT_MAC_RX_CBFCPAUSEFramesReceived_6 = 0x20, + PORT_MAC_RX_CBFCPAUSEFramesReceived_7 = 0x21, + PORT_MAC_RX_CBFCPAUSEFramesReceived_8 = 0x22, + PORT_MAC_RX_CBFCPAUSEFramesReceived_9 = 0x23, + PORT_MAC_RX_CBFCPAUSEFramesReceived_10 = 0x24, + PORT_MAC_RX_CBFCPAUSEFramesReceived_11 = 0x25, + PORT_MAC_RX_CBFCPAUSEFramesReceived_12 = 0x26, + PORT_MAC_RX_CBFCPAUSEFramesReceived_13 = 0x27, + PORT_MAC_RX_CBFCPAUSEFramesReceived_14 = 0x28, + PORT_MAC_RX_CBFCPAUSEFramesReceived_15 = 0x29, + PORT_MAC_RX_MACControlFramesReceived = 0x2a, + PORT_MAC_RX_STATS_MAX = 0x2b, +}; + +enum port_mac_tx_stats { + PORT_MAC_TX_etherStatsOctets = 0x0, + PORT_MAC_TX_OctetsTransmittedOK = 0x1, + PORT_MAC_TX_aPAUSEMACCtrlFramesTransmitted = 0x2, + PORT_MAC_TX_aFramesTransmittedOK = 0x3, + PORT_MAC_TX_VLANTransmittedOK = 0x4, + PORT_MAC_TX_ifOutErrors = 0x5, + PORT_MAC_TX_ifOutUcastPkts = 0x6, + PORT_MAC_TX_ifOutMulticastPkts = 0x7, + PORT_MAC_TX_ifOutBroadcastPkts = 0x8, + PORT_MAC_TX_etherStatsPkts64Octets = 0x9, + PORT_MAC_TX_etherStatsPkts65to127Octets = 0xa, + PORT_MAC_TX_etherStatsPkts128to255Octets = 0xb, + PORT_MAC_TX_etherStatsPkts256to511Octets = 0xc, + PORT_MAC_TX_etherStatsPkts512to1023Octets = 0xd, + PORT_MAC_TX_etherStatsPkts1024to1518Octets = 0xe, + PORT_MAC_TX_etherStatsPkts1519toMaxOctets = 0xf, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_0 = 0x10, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_1 = 0x11, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_2 = 0x12, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_3 = 0x13, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_4 = 0x14, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_5 = 0x15, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_6 = 0x16, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_7 = 0x17, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_8 = 0x18, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_9 = 0x19, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_10 = 0x1a, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_11 = 0x1b, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_12 = 0x1c, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_13 = 0x1d, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_14 = 0x1e, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_15 = 0x1f, + PORT_MAC_TX_MACControlFramesTransmitted = 0x20, + PORT_MAC_TX_etherStatsPkts = 0x21, + PORT_MAC_TX_STATS_MAX = 0x22, +}; + +enum port_mac_fec_stats { + PORT_MAC_FEC_Correctable = 0x0, + PORT_MAC_FEC_Uncorrectable = 0x1, + PORT_MAC_FEC_STATS_MAX = 0x2, +}; + +#endif /* _FUN_PORT_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h new file mode 100644 index 000000000000..1250e10d21db --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNETH_H +#define _FUNETH_H + +#include <uapi/linux/if_ether.h> +#include <uapi/linux/net_tstamp.h> +#include <linux/mutex.h> +#include <linux/seqlock.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include "fun_dev.h" + +#define ADMIN_SQE_SIZE SZ_128 +#define ADMIN_CQE_SIZE SZ_64 +#define ADMIN_RSP_MAX_LEN (ADMIN_CQE_SIZE - sizeof(struct fun_cqe_info)) + +#define FUN_MAX_MTU 9024 + +#define SQ_DEPTH 512U +#define CQ_DEPTH 1024U +#define RQ_DEPTH (512U / (PAGE_SIZE / 4096)) + +#define CQ_INTCOAL_USEC 10 +#define CQ_INTCOAL_NPKT 16 +#define SQ_INTCOAL_USEC 10 +#define SQ_INTCOAL_NPKT 16 + +#define INVALID_LPORT 0xffff + +#define FUN_PORT_CAP_PAUSE_MASK (FUN_PORT_CAP_TX_PAUSE | FUN_PORT_CAP_RX_PAUSE) + +struct fun_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + __be16 vlan_proto; + u8 qos; + u8 spoofchk:1; + u8 trusted:1; + unsigned int max_rate; +}; + +/* "subclass" of fun_dev for Ethernet functions */ +struct fun_ethdev { + struct fun_dev fdev; + + /* the function's network ports */ + struct net_device **netdevs; + unsigned int num_ports; + + /* configuration for the function's virtual ports */ + unsigned int num_vports; + struct fun_vport_info *vport_info; + + struct mutex state_mutex; /* nests inside RTNL if both taken */ + + unsigned int nsqs_per_port; +}; + +static inline struct fun_ethdev *to_fun_ethdev(struct fun_dev *p) +{ + return container_of(p, struct fun_ethdev, fdev); +} + +struct fun_qset { + struct funeth_rxq **rxqs; + struct funeth_txq **txqs; + struct funeth_txq **xdpqs; + unsigned int nrxqs; + unsigned int ntxqs; + unsigned int nxdpqs; + unsigned int rxq_start; + unsigned int txq_start; + unsigned int xdpq_start; + unsigned int cq_depth; + unsigned int rq_depth; + unsigned int sq_depth; + int state; +}; + +/* Per netdevice driver state, i.e., netdev_priv. */ +struct funeth_priv { + struct fun_dev *fdev; + struct pci_dev *pdev; + struct net_device *netdev; + + struct funeth_rxq * __rcu *rxqs; + struct funeth_txq **txqs; + struct funeth_txq * __rcu *xdpqs; + + struct xarray irqs; + unsigned int num_tx_irqs; + unsigned int num_rx_irqs; + unsigned int rx_irq_ofst; + + unsigned int lane_attrs; + u16 lport; + + /* link settings */ + u64 port_caps; + u64 advertising; + u64 lp_advertising; + unsigned int link_speed; + u8 xcvr_type; + u8 active_fc; + u8 active_fec; + u8 link_down_reason; + seqcount_t link_seq; + + u32 msg_enable; + + unsigned int num_xdpqs; + + /* ethtool, etc. config parameters */ + unsigned int sq_depth; + unsigned int rq_depth; + unsigned int cq_depth; + unsigned int cq_irq_db; + u8 tx_coal_usec; + u8 tx_coal_count; + u8 rx_coal_usec; + u8 rx_coal_count; + + struct hwtstamp_config hwtstamp_cfg; + + /* cumulative queue stats from earlier queue instances */ + u64 tx_packets; + u64 tx_bytes; + u64 tx_dropped; + u64 rx_packets; + u64 rx_bytes; + u64 rx_dropped; + + /* RSS */ + unsigned int rss_hw_id; + enum fun_eth_hash_alg hash_algo; + u8 rss_key[FUN_ETH_RSS_MAX_KEY_SIZE]; + unsigned int indir_table_nentries; + u32 indir_table[FUN_ETH_RSS_MAX_INDIR_ENT]; + dma_addr_t rss_dma_addr; + void *rss_cfg; + + /* DMA area for port stats */ + dma_addr_t stats_dma_addr; + __be64 *stats; + + struct bpf_prog *xdp_prog; + + struct devlink_port dl_port; + + /* kTLS state */ + unsigned int ktls_id; + atomic64_t tx_tls_add; + atomic64_t tx_tls_del; + atomic64_t tx_tls_resync; +}; + +void fun_set_ethtool_ops(struct net_device *netdev); +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data); +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data); +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid); +int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs, + struct netlink_ext_ack *extack); +int fun_change_num_queues(struct net_device *dev, unsigned int ntx, + unsigned int nrx); +void fun_set_ring_count(struct net_device *netdev, unsigned int ntx, + unsigned int nrx); +int fun_config_rss(struct net_device *dev, int algo, const u8 *key, + const u32 *qtable, u8 op); + +#endif /* _FUNETH_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c new file mode 100644 index 000000000000..a849b3c6b01f --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include "funeth.h" +#include "funeth_devlink.h" + +static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + int err; + + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (err) + return err; + + return 0; +} + +static const struct devlink_ops fun_dl_ops = { + .info_get = fun_dl_info_get, +}; + +struct devlink *fun_devlink_alloc(struct device *dev) +{ + return devlink_alloc(&fun_dl_ops, sizeof(struct fun_ethdev), dev); +} + +void fun_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +void fun_devlink_register(struct devlink *devlink) +{ + devlink_register(devlink); +} + +void fun_devlink_unregister(struct devlink *devlink) +{ + devlink_unregister(devlink); +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.h b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h new file mode 100644 index 000000000000..e40464d57ff4 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef __FUNETH_DEVLINK_H +#define __FUNETH_DEVLINK_H + +#include <net/devlink.h> + +struct devlink *fun_devlink_alloc(struct device *dev); +void fun_devlink_free(struct devlink *devlink); +void fun_devlink_register(struct devlink *devlink); +void fun_devlink_unregister(struct devlink *devlink); + +#endif /* __FUNETH_DEVLINK_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c new file mode 100644 index 000000000000..d081168c95fa --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -0,0 +1,1162 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/ethtool.h> +#include <linux/linkmode.h> +#include <linux/netdevice.h> +#include <linux/nvme.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/pci.h> +#include <linux/rtnetlink.h> +#include "funeth.h" +#include "fun_port.h" +#include "funeth_txrx.h" + +/* Min queue depth. The smallest power-of-2 supporting jumbo frames with 4K + * pages is 8. Require it for all types of queues though some could work with + * fewer entries. + */ +#define FUNETH_MIN_QDEPTH 8 + +static const char mac_tx_stat_names[][ETH_GSTRING_LEN] = { + "mac_tx_octets_total", + "mac_tx_frames_total", + "mac_tx_vlan_frames_ok", + "mac_tx_unicast_frames", + "mac_tx_multicast_frames", + "mac_tx_broadcast_frames", + "mac_tx_errors", + "mac_tx_CBFCPAUSE0", + "mac_tx_CBFCPAUSE1", + "mac_tx_CBFCPAUSE2", + "mac_tx_CBFCPAUSE3", + "mac_tx_CBFCPAUSE4", + "mac_tx_CBFCPAUSE5", + "mac_tx_CBFCPAUSE6", + "mac_tx_CBFCPAUSE7", + "mac_tx_CBFCPAUSE8", + "mac_tx_CBFCPAUSE9", + "mac_tx_CBFCPAUSE10", + "mac_tx_CBFCPAUSE11", + "mac_tx_CBFCPAUSE12", + "mac_tx_CBFCPAUSE13", + "mac_tx_CBFCPAUSE14", + "mac_tx_CBFCPAUSE15", +}; + +static const char mac_rx_stat_names[][ETH_GSTRING_LEN] = { + "mac_rx_octets_total", + "mac_rx_frames_total", + "mac_rx_VLAN_frames_ok", + "mac_rx_unicast_frames", + "mac_rx_multicast_frames", + "mac_rx_broadcast_frames", + "mac_rx_drop_events", + "mac_rx_errors", + "mac_rx_alignment_errors", + "mac_rx_CBFCPAUSE0", + "mac_rx_CBFCPAUSE1", + "mac_rx_CBFCPAUSE2", + "mac_rx_CBFCPAUSE3", + "mac_rx_CBFCPAUSE4", + "mac_rx_CBFCPAUSE5", + "mac_rx_CBFCPAUSE6", + "mac_rx_CBFCPAUSE7", + "mac_rx_CBFCPAUSE8", + "mac_rx_CBFCPAUSE9", + "mac_rx_CBFCPAUSE10", + "mac_rx_CBFCPAUSE11", + "mac_rx_CBFCPAUSE12", + "mac_rx_CBFCPAUSE13", + "mac_rx_CBFCPAUSE14", + "mac_rx_CBFCPAUSE15", +}; + +static const char * const txq_stat_names[] = { + "tx_pkts", + "tx_bytes", + "tx_cso", + "tx_tso", + "tx_encapsulated_tso", + "tx_more", + "tx_queue_stops", + "tx_queue_restarts", + "tx_mapping_errors", + "tx_tls_encrypted_packets", + "tx_tls_encrypted_bytes", + "tx_tls_ooo", + "tx_tls_drop_no_sync_data", +}; + +static const char * const xdpq_stat_names[] = { + "tx_xdp_pkts", + "tx_xdp_bytes", + "tx_xdp_full", + "tx_xdp_mapping_errors", +}; + +static const char * const rxq_stat_names[] = { + "rx_pkts", + "rx_bytes", + "rx_cso", + "gro_pkts", + "gro_merged", + "rx_xdp_tx", + "rx_xdp_redir", + "rx_xdp_drops", + "rx_buffers", + "rx_page_allocs", + "rx_drops", + "rx_budget_exhausted", + "rx_mapping_errors", +}; + +static const char * const tls_stat_names[] = { + "tx_tls_ctx", + "tx_tls_del", + "tx_tls_resync", +}; + +static void fun_link_modes_to_ethtool(u64 modes, + unsigned long *ethtool_modes_map) +{ +#define ADD_LINK_MODE(mode) \ + __set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, ethtool_modes_map) + + if (modes & FUN_PORT_CAP_AUTONEG) + ADD_LINK_MODE(Autoneg); + if (modes & FUN_PORT_CAP_1000_X) + ADD_LINK_MODE(1000baseX_Full); + if (modes & FUN_PORT_CAP_10G_R) { + ADD_LINK_MODE(10000baseCR_Full); + ADD_LINK_MODE(10000baseSR_Full); + ADD_LINK_MODE(10000baseLR_Full); + ADD_LINK_MODE(10000baseER_Full); + } + if (modes & FUN_PORT_CAP_25G_R) { + ADD_LINK_MODE(25000baseCR_Full); + ADD_LINK_MODE(25000baseSR_Full); + } + if (modes & FUN_PORT_CAP_40G_R4) { + ADD_LINK_MODE(40000baseCR4_Full); + ADD_LINK_MODE(40000baseSR4_Full); + ADD_LINK_MODE(40000baseLR4_Full); + } + if (modes & FUN_PORT_CAP_50G_R2) { + ADD_LINK_MODE(50000baseCR2_Full); + ADD_LINK_MODE(50000baseSR2_Full); + } + if (modes & FUN_PORT_CAP_50G_R) { + ADD_LINK_MODE(50000baseCR_Full); + ADD_LINK_MODE(50000baseSR_Full); + ADD_LINK_MODE(50000baseLR_ER_FR_Full); + } + if (modes & FUN_PORT_CAP_100G_R4) { + ADD_LINK_MODE(100000baseCR4_Full); + ADD_LINK_MODE(100000baseSR4_Full); + ADD_LINK_MODE(100000baseLR4_ER4_Full); + } + if (modes & FUN_PORT_CAP_100G_R2) { + ADD_LINK_MODE(100000baseCR2_Full); + ADD_LINK_MODE(100000baseSR2_Full); + ADD_LINK_MODE(100000baseLR2_ER2_FR2_Full); + } + if (modes & FUN_PORT_CAP_FEC_NONE) + ADD_LINK_MODE(FEC_NONE); + if (modes & FUN_PORT_CAP_FEC_FC) + ADD_LINK_MODE(FEC_BASER); + if (modes & FUN_PORT_CAP_FEC_RS) + ADD_LINK_MODE(FEC_RS); + if (modes & FUN_PORT_CAP_RX_PAUSE) + ADD_LINK_MODE(Pause); + +#undef ADD_LINK_MODE +} + +static void set_asym_pause(u64 advertising, struct ethtool_link_ksettings *ks) +{ + bool rx_pause, tx_pause; + + rx_pause = advertising & FUN_PORT_CAP_RX_PAUSE; + tx_pause = advertising & FUN_PORT_CAP_TX_PAUSE; + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); +} + +static unsigned int fun_port_type(unsigned int xcvr) +{ + if (!xcvr) + return PORT_NONE; + + switch (xcvr & 7) { + case FUN_XCVR_BASET: + return PORT_TP; + case FUN_XCVR_CU: + return PORT_DA; + default: + return PORT_FIBRE; + } +} + +static int fun_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ks) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int seq, speed, xcvr; + u64 lp_advertising; + bool link_up; + + ethtool_link_ksettings_zero_link_mode(ks, supported); + ethtool_link_ksettings_zero_link_mode(ks, advertising); + ethtool_link_ksettings_zero_link_mode(ks, lp_advertising); + + /* Link settings change asynchronously, take a consistent snapshot */ + do { + seq = read_seqcount_begin(&fp->link_seq); + link_up = netif_carrier_ok(netdev); + speed = fp->link_speed; + xcvr = fp->xcvr_type; + lp_advertising = fp->lp_advertising; + } while (read_seqcount_retry(&fp->link_seq, seq)); + + if (link_up) { + ks->base.speed = speed; + ks->base.duplex = DUPLEX_FULL; + fun_link_modes_to_ethtool(lp_advertising, + ks->link_modes.lp_advertising); + } else { + ks->base.speed = SPEED_UNKNOWN; + ks->base.duplex = DUPLEX_UNKNOWN; + } + + ks->base.autoneg = (fp->advertising & FUN_PORT_CAP_AUTONEG) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + ks->base.port = fun_port_type(xcvr); + + fun_link_modes_to_ethtool(fp->port_caps, ks->link_modes.supported); + if (fp->port_caps & (FUN_PORT_CAP_RX_PAUSE | FUN_PORT_CAP_TX_PAUSE)) + ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause); + + fun_link_modes_to_ethtool(fp->advertising, ks->link_modes.advertising); + set_asym_pause(fp->advertising, ks); + return 0; +} + +static u64 fun_advert_modes(const struct ethtool_link_ksettings *ks) +{ + u64 modes = 0; + +#define HAS_MODE(mode) \ + ethtool_link_ksettings_test_link_mode(ks, advertising, mode) + + if (HAS_MODE(1000baseX_Full)) + modes |= FUN_PORT_CAP_1000_X; + if (HAS_MODE(10000baseCR_Full) || HAS_MODE(10000baseSR_Full) || + HAS_MODE(10000baseLR_Full) || HAS_MODE(10000baseER_Full)) + modes |= FUN_PORT_CAP_10G_R; + if (HAS_MODE(25000baseCR_Full) || HAS_MODE(25000baseSR_Full)) + modes |= FUN_PORT_CAP_25G_R; + if (HAS_MODE(40000baseCR4_Full) || HAS_MODE(40000baseSR4_Full) || + HAS_MODE(40000baseLR4_Full)) + modes |= FUN_PORT_CAP_40G_R4; + if (HAS_MODE(50000baseCR2_Full) || HAS_MODE(50000baseSR2_Full)) + modes |= FUN_PORT_CAP_50G_R2; + if (HAS_MODE(50000baseCR_Full) || HAS_MODE(50000baseSR_Full) || + HAS_MODE(50000baseLR_ER_FR_Full)) + modes |= FUN_PORT_CAP_50G_R; + if (HAS_MODE(100000baseCR4_Full) || HAS_MODE(100000baseSR4_Full) || + HAS_MODE(100000baseLR4_ER4_Full)) + modes |= FUN_PORT_CAP_100G_R4; + if (HAS_MODE(100000baseCR2_Full) || HAS_MODE(100000baseSR2_Full) || + HAS_MODE(100000baseLR2_ER2_FR2_Full)) + modes |= FUN_PORT_CAP_100G_R2; + + return modes; +#undef HAS_MODE +} + +static u64 fun_speed_to_link_mode(unsigned int speed) +{ + switch (speed) { + case SPEED_100000: + return FUN_PORT_CAP_100G_R4 | FUN_PORT_CAP_100G_R2; + case SPEED_50000: + return FUN_PORT_CAP_50G_R | FUN_PORT_CAP_50G_R2; + case SPEED_40000: + return FUN_PORT_CAP_40G_R4; + case SPEED_25000: + return FUN_PORT_CAP_25G_R; + case SPEED_10000: + return FUN_PORT_CAP_10G_R; + case SPEED_1000: + return FUN_PORT_CAP_1000_X; + default: + return 0; + } +} + +static int fun_change_advert(struct funeth_priv *fp, u64 new_advert) +{ + int err; + + if (new_advert == fp->advertising) + return 0; + + err = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT, new_advert); + if (!err) + fp->advertising = new_advert; + return err; +} + +#define FUN_PORT_CAP_FEC_MASK \ + (FUN_PORT_CAP_FEC_NONE | FUN_PORT_CAP_FEC_FC | FUN_PORT_CAP_FEC_RS) + +static int fun_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *ks) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {}; + struct funeth_priv *fp = netdev_priv(netdev); + u64 new_advert; + + /* eswitch ports don't support mode changes */ + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + + if (ks->base.duplex == DUPLEX_HALF) + return -EINVAL; + if (ks->base.autoneg == AUTONEG_ENABLE && + !(fp->port_caps & FUN_PORT_CAP_AUTONEG)) + return -EINVAL; + + if (ks->base.autoneg == AUTONEG_ENABLE) { + if (linkmode_empty(ks->link_modes.advertising)) + return -EINVAL; + + fun_link_modes_to_ethtool(fp->port_caps, supported); + if (!linkmode_subset(ks->link_modes.advertising, supported)) + return -EINVAL; + + new_advert = fun_advert_modes(ks) | FUN_PORT_CAP_AUTONEG; + } else { + new_advert = fun_speed_to_link_mode(ks->base.speed); + new_advert &= fp->port_caps; + if (!new_advert) + return -EINVAL; + } + new_advert |= fp->advertising & + (FUN_PORT_CAP_PAUSE_MASK | FUN_PORT_CAP_FEC_MASK); + + return fun_change_advert(fp, new_advert); +} + +static void fun_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + u8 active_pause = fp->active_fc; + + pause->rx_pause = !!(active_pause & FUN_PORT_CAP_RX_PAUSE); + pause->tx_pause = !!(active_pause & FUN_PORT_CAP_TX_PAUSE); + pause->autoneg = !!(fp->advertising & FUN_PORT_CAP_AUTONEG); +} + +static int fun_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 new_advert; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + /* Forcing PAUSE settings with AN enabled is unsupported. */ + if (!pause->autoneg && (fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EOPNOTSUPP; + if (pause->autoneg && !(fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EINVAL; + if (pause->tx_pause && !(fp->port_caps & FUN_PORT_CAP_TX_PAUSE)) + return -EINVAL; + if (pause->rx_pause && !(fp->port_caps & FUN_PORT_CAP_RX_PAUSE)) + return -EINVAL; + + new_advert = fp->advertising & ~FUN_PORT_CAP_PAUSE_MASK; + if (pause->tx_pause) + new_advert |= FUN_PORT_CAP_TX_PAUSE; + if (pause->rx_pause) + new_advert |= FUN_PORT_CAP_RX_PAUSE; + + return fun_change_advert(fp, new_advert); +} + +static int fun_restart_an(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EOPNOTSUPP; + + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT, + FUN_PORT_CAP_AUTONEG); +} + +static int fun_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct funeth_priv *fp = netdev_priv(netdev); + unsigned int beacon; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + if (state != ETHTOOL_ID_ACTIVE && state != ETHTOOL_ID_INACTIVE) + return -EOPNOTSUPP; + + beacon = state == ETHTOOL_ID_ACTIVE ? FUN_PORT_LED_BEACON_ON : + FUN_PORT_LED_BEACON_OFF; + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_LED, beacon); +} + +static void fun_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(fp->pdev), sizeof(info->bus_info)); +} + +static u32 fun_get_msglevel(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return fp->msg_enable; +} + +static void fun_set_msglevel(struct net_device *netdev, u32 value) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + fp->msg_enable = value; +} + +static int fun_get_regs_len(struct net_device *dev) +{ + return NVME_REG_ACQ + sizeof(u64); +} + +static void fun_get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *buf) +{ + const struct funeth_priv *fp = netdev_priv(dev); + void __iomem *bar = fp->fdev->bar; + + regs->version = 0; + *(u64 *)(buf + NVME_REG_CAP) = readq(bar + NVME_REG_CAP); + *(u32 *)(buf + NVME_REG_VS) = readl(bar + NVME_REG_VS); + *(u32 *)(buf + NVME_REG_INTMS) = readl(bar + NVME_REG_INTMS); + *(u32 *)(buf + NVME_REG_INTMC) = readl(bar + NVME_REG_INTMC); + *(u32 *)(buf + NVME_REG_CC) = readl(bar + NVME_REG_CC); + *(u32 *)(buf + NVME_REG_CSTS) = readl(bar + NVME_REG_CSTS); + *(u32 *)(buf + NVME_REG_AQA) = readl(bar + NVME_REG_AQA); + *(u64 *)(buf + NVME_REG_ASQ) = readq(bar + NVME_REG_ASQ); + *(u64 *)(buf + NVME_REG_ACQ) = readq(bar + NVME_REG_ACQ); +} + +static int fun_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kcoal, + struct netlink_ext_ack *ext_ack) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + coal->rx_coalesce_usecs = fp->rx_coal_usec; + coal->rx_max_coalesced_frames = fp->rx_coal_count; + coal->use_adaptive_rx_coalesce = !fp->cq_irq_db; + coal->tx_coalesce_usecs = fp->tx_coal_usec; + coal->tx_max_coalesced_frames = fp->tx_coal_count; + return 0; +} + +static int fun_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kcoal, + struct netlink_ext_ack *ext_ack) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_rxq **rxqs; + unsigned int i, db_val; + + if (coal->rx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M || + coal->rx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M || + (coal->rx_coalesce_usecs | coal->rx_max_coalesced_frames) == 0 || + coal->tx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M || + coal->tx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M || + (coal->tx_coalesce_usecs | coal->tx_max_coalesced_frames) == 0) + return -EINVAL; + + /* a timer is required if there's any coalescing */ + if ((coal->rx_max_coalesced_frames > 1 && !coal->rx_coalesce_usecs) || + (coal->tx_max_coalesced_frames > 1 && !coal->tx_coalesce_usecs)) + return -EINVAL; + + fp->rx_coal_usec = coal->rx_coalesce_usecs; + fp->rx_coal_count = coal->rx_max_coalesced_frames; + fp->tx_coal_usec = coal->tx_coalesce_usecs; + fp->tx_coal_count = coal->tx_max_coalesced_frames; + + db_val = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count); + WRITE_ONCE(fp->cq_irq_db, db_val); + + rxqs = rtnl_dereference(fp->rxqs); + if (!rxqs) + return 0; + + for (i = 0; i < netdev->real_num_rx_queues; i++) + WRITE_ONCE(rxqs[i]->irq_db_val, db_val); + + db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec, fp->tx_coal_count); + for (i = 0; i < netdev->real_num_tx_queues; i++) + WRITE_ONCE(fp->txqs[i]->irq_db_val, db_val); + + return 0; +} + +static void fun_get_channels(struct net_device *netdev, + struct ethtool_channels *chan) +{ + chan->max_rx = netdev->num_rx_queues; + chan->rx_count = netdev->real_num_rx_queues; + + chan->max_tx = netdev->num_tx_queues; + chan->tx_count = netdev->real_num_tx_queues; +} + +static int fun_set_channels(struct net_device *netdev, + struct ethtool_channels *chan) +{ + if (!chan->tx_count || !chan->rx_count) + return -EINVAL; + + if (chan->tx_count == netdev->real_num_tx_queues && + chan->rx_count == netdev->real_num_rx_queues) + return 0; + + if (netif_running(netdev)) + return fun_change_num_queues(netdev, chan->tx_count, + chan->rx_count); + + fun_set_ring_count(netdev, chan->tx_count, chan->rx_count); + return 0; +} + +static void fun_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *extack) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int max_depth = fp->fdev->q_depth; + + /* We size CQs to be twice the RQ depth so max RQ depth is half the + * max queue depth. + */ + ring->rx_max_pending = max_depth / 2; + ring->tx_max_pending = max_depth; + + ring->rx_pending = fp->rq_depth; + ring->tx_pending = fp->sq_depth; + + kring->rx_buf_len = PAGE_SIZE; + kring->cqe_size = FUNETH_CQE_SIZE; +} + +static int fun_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *extack) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + /* queue depths must be powers-of-2 */ + if (!is_power_of_2(ring->rx_pending) || + !is_power_of_2(ring->tx_pending)) + return -EINVAL; + + if (ring->rx_pending < FUNETH_MIN_QDEPTH || + ring->tx_pending < FUNETH_MIN_QDEPTH) + return -EINVAL; + + if (fp->sq_depth == ring->tx_pending && + fp->rq_depth == ring->rx_pending) + return 0; + + if (netif_running(netdev)) { + struct fun_qset req = { + .cq_depth = 2 * ring->rx_pending, + .rq_depth = ring->rx_pending, + .sq_depth = ring->tx_pending + }; + + rc = fun_replace_queues(netdev, &req, extack); + if (rc) + return rc; + } + + fp->sq_depth = ring->tx_pending; + fp->rq_depth = ring->rx_pending; + fp->cq_depth = 2 * fp->rq_depth; + return 0; +} + +static int fun_get_sset_count(struct net_device *dev, int sset) +{ + const struct funeth_priv *fp = netdev_priv(dev); + int n; + + switch (sset) { + case ETH_SS_STATS: + n = (dev->real_num_tx_queues + 1) * ARRAY_SIZE(txq_stat_names) + + (dev->real_num_rx_queues + 1) * ARRAY_SIZE(rxq_stat_names) + + (fp->num_xdpqs + 1) * ARRAY_SIZE(xdpq_stat_names) + + ARRAY_SIZE(tls_stat_names); + if (fp->port_caps & FUN_PORT_CAP_STATS) { + n += ARRAY_SIZE(mac_tx_stat_names) + + ARRAY_SIZE(mac_rx_stat_names); + } + return n; + default: + break; + } + return 0; +} + +static void fun_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int i, j; + u8 *p = data; + + switch (sset) { + case ETH_SS_STATS: + if (fp->port_caps & FUN_PORT_CAP_STATS) { + memcpy(p, mac_tx_stat_names, sizeof(mac_tx_stat_names)); + p += sizeof(mac_tx_stat_names); + memcpy(p, mac_rx_stat_names, sizeof(mac_rx_stat_names)); + p += sizeof(mac_rx_stat_names); + } + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", txq_stat_names[j], + i); + } + for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++) + ethtool_sprintf(&p, txq_stat_names[j]); + + for (i = 0; i < fp->num_xdpqs; i++) { + for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", + xdpq_stat_names[j], i); + } + for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++) + ethtool_sprintf(&p, xdpq_stat_names[j]); + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", rxq_stat_names[j], + i); + } + for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++) + ethtool_sprintf(&p, rxq_stat_names[j]); + + for (j = 0; j < ARRAY_SIZE(tls_stat_names); j++) + ethtool_sprintf(&p, tls_stat_names[j]); + break; + default: + break; + } +} + +static u64 *get_mac_stats(const struct funeth_priv *fp, u64 *data) +{ +#define TX_STAT(s) \ + *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s]) + + TX_STAT(etherStatsOctets); + TX_STAT(etherStatsPkts); + TX_STAT(VLANTransmittedOK); + TX_STAT(ifOutUcastPkts); + TX_STAT(ifOutMulticastPkts); + TX_STAT(ifOutBroadcastPkts); + TX_STAT(ifOutErrors); + TX_STAT(CBFCPAUSEFramesTransmitted_0); + TX_STAT(CBFCPAUSEFramesTransmitted_1); + TX_STAT(CBFCPAUSEFramesTransmitted_2); + TX_STAT(CBFCPAUSEFramesTransmitted_3); + TX_STAT(CBFCPAUSEFramesTransmitted_4); + TX_STAT(CBFCPAUSEFramesTransmitted_5); + TX_STAT(CBFCPAUSEFramesTransmitted_6); + TX_STAT(CBFCPAUSEFramesTransmitted_7); + TX_STAT(CBFCPAUSEFramesTransmitted_8); + TX_STAT(CBFCPAUSEFramesTransmitted_9); + TX_STAT(CBFCPAUSEFramesTransmitted_10); + TX_STAT(CBFCPAUSEFramesTransmitted_11); + TX_STAT(CBFCPAUSEFramesTransmitted_12); + TX_STAT(CBFCPAUSEFramesTransmitted_13); + TX_STAT(CBFCPAUSEFramesTransmitted_14); + TX_STAT(CBFCPAUSEFramesTransmitted_15); + +#define RX_STAT(s) *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_##s]) + + RX_STAT(etherStatsOctets); + RX_STAT(etherStatsPkts); + RX_STAT(VLANReceivedOK); + RX_STAT(ifInUcastPkts); + RX_STAT(ifInMulticastPkts); + RX_STAT(ifInBroadcastPkts); + RX_STAT(etherStatsDropEvents); + RX_STAT(ifInErrors); + RX_STAT(aAlignmentErrors); + RX_STAT(CBFCPAUSEFramesReceived_0); + RX_STAT(CBFCPAUSEFramesReceived_1); + RX_STAT(CBFCPAUSEFramesReceived_2); + RX_STAT(CBFCPAUSEFramesReceived_3); + RX_STAT(CBFCPAUSEFramesReceived_4); + RX_STAT(CBFCPAUSEFramesReceived_5); + RX_STAT(CBFCPAUSEFramesReceived_6); + RX_STAT(CBFCPAUSEFramesReceived_7); + RX_STAT(CBFCPAUSEFramesReceived_8); + RX_STAT(CBFCPAUSEFramesReceived_9); + RX_STAT(CBFCPAUSEFramesReceived_10); + RX_STAT(CBFCPAUSEFramesReceived_11); + RX_STAT(CBFCPAUSEFramesReceived_12); + RX_STAT(CBFCPAUSEFramesReceived_13); + RX_STAT(CBFCPAUSEFramesReceived_14); + RX_STAT(CBFCPAUSEFramesReceived_15); + + return data; + +#undef TX_STAT +#undef RX_STAT +} + +static void fun_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq_stats txs; + struct funeth_rxq_stats rxs; + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i, start; + u64 *totals, *tot; + + if (fp->port_caps & FUN_PORT_CAP_STATS) + data = get_mac_stats(fp, data); + + rxqs = rtnl_dereference(fp->rxqs); + if (!rxqs) + return; + +#define ADD_STAT(cnt) do { \ + *data = (cnt); *tot++ += *data++; \ +} while (0) + + /* Tx queues */ + totals = data + netdev->real_num_tx_queues * ARRAY_SIZE(txq_stat_names); + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + tot = totals; + + FUN_QSTAT_READ(fp->txqs[i], start, txs); + + ADD_STAT(txs.tx_pkts); + ADD_STAT(txs.tx_bytes); + ADD_STAT(txs.tx_cso); + ADD_STAT(txs.tx_tso); + ADD_STAT(txs.tx_encap_tso); + ADD_STAT(txs.tx_more); + ADD_STAT(txs.tx_nstops); + ADD_STAT(txs.tx_nrestarts); + ADD_STAT(txs.tx_map_err); + ADD_STAT(txs.tx_tls_pkts); + ADD_STAT(txs.tx_tls_bytes); + ADD_STAT(txs.tx_tls_fallback); + ADD_STAT(txs.tx_tls_drops); + } + data += ARRAY_SIZE(txq_stat_names); + + /* XDP Tx queues */ + xdpqs = rtnl_dereference(fp->xdpqs); + totals = data + fp->num_xdpqs * ARRAY_SIZE(xdpq_stat_names); + + for (i = 0; i < fp->num_xdpqs; i++) { + tot = totals; + + FUN_QSTAT_READ(xdpqs[i], start, txs); + + ADD_STAT(txs.tx_pkts); + ADD_STAT(txs.tx_bytes); + ADD_STAT(txs.tx_xdp_full); + ADD_STAT(txs.tx_map_err); + } + data += ARRAY_SIZE(xdpq_stat_names); + + /* Rx queues */ + totals = data + netdev->real_num_rx_queues * ARRAY_SIZE(rxq_stat_names); + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + tot = totals; + + FUN_QSTAT_READ(rxqs[i], start, rxs); + + ADD_STAT(rxs.rx_pkts); + ADD_STAT(rxs.rx_bytes); + ADD_STAT(rxs.rx_cso); + ADD_STAT(rxs.gro_pkts); + ADD_STAT(rxs.gro_merged); + ADD_STAT(rxs.xdp_tx); + ADD_STAT(rxs.xdp_redir); + ADD_STAT(rxs.xdp_drops); + ADD_STAT(rxs.rx_bufs); + ADD_STAT(rxs.rx_page_alloc); + ADD_STAT(rxs.rx_mem_drops + rxs.xdp_err); + ADD_STAT(rxs.rx_budget); + ADD_STAT(rxs.rx_map_err); + } + data += ARRAY_SIZE(rxq_stat_names); +#undef ADD_STAT + + *data++ = atomic64_read(&fp->tx_tls_add); + *data++ = atomic64_read(&fp->tx_tls_del); + *data++ = atomic64_read(&fp->tx_tls_resync); +} + +#define RX_STAT(fp, s) be64_to_cpu((fp)->stats[PORT_MAC_RX_##s]) +#define TX_STAT(fp, s) \ + be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s]) +#define FEC_STAT(fp, s) \ + be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + \ + PORT_MAC_TX_STATS_MAX + PORT_MAC_FEC_##s]) + +static void fun_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->tx_pause_frames = TX_STAT(fp, aPAUSEMACCtrlFramesTransmitted); + stats->rx_pause_frames = RX_STAT(fp, aPAUSEMACCtrlFramesReceived); +} + +static void fun_get_802_3_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->FramesTransmittedOK = TX_STAT(fp, aFramesTransmittedOK); + stats->FramesReceivedOK = RX_STAT(fp, aFramesReceivedOK); + stats->FrameCheckSequenceErrors = RX_STAT(fp, aFrameCheckSequenceErrors); + stats->OctetsTransmittedOK = TX_STAT(fp, OctetsTransmittedOK); + stats->OctetsReceivedOK = RX_STAT(fp, OctetsReceivedOK); + stats->InRangeLengthErrors = RX_STAT(fp, aInRangeLengthErrors); + stats->FrameTooLongErrors = RX_STAT(fp, aFrameTooLongErrors); +} + +static void fun_get_802_3_ctrl_stats(struct net_device *netdev, + struct ethtool_eth_ctrl_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->MACControlFramesTransmitted = TX_STAT(fp, MACControlFramesTransmitted); + stats->MACControlFramesReceived = RX_STAT(fp, MACControlFramesReceived); +} + +static void fun_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *stats, + const struct ethtool_rmon_hist_range **ranges) +{ + static const struct ethtool_rmon_hist_range rmon_ranges[] = { + { 64, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 32767 }, + {} + }; + + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->undersize_pkts = RX_STAT(fp, etherStatsUndersizePkts); + stats->oversize_pkts = RX_STAT(fp, etherStatsOversizePkts); + stats->fragments = RX_STAT(fp, etherStatsFragments); + stats->jabbers = RX_STAT(fp, etherStatsJabbers); + + stats->hist[0] = RX_STAT(fp, etherStatsPkts64Octets); + stats->hist[1] = RX_STAT(fp, etherStatsPkts65to127Octets); + stats->hist[2] = RX_STAT(fp, etherStatsPkts128to255Octets); + stats->hist[3] = RX_STAT(fp, etherStatsPkts256to511Octets); + stats->hist[4] = RX_STAT(fp, etherStatsPkts512to1023Octets); + stats->hist[5] = RX_STAT(fp, etherStatsPkts1024to1518Octets); + stats->hist[6] = RX_STAT(fp, etherStatsPkts1519toMaxOctets); + + stats->hist_tx[0] = TX_STAT(fp, etherStatsPkts64Octets); + stats->hist_tx[1] = TX_STAT(fp, etherStatsPkts65to127Octets); + stats->hist_tx[2] = TX_STAT(fp, etherStatsPkts128to255Octets); + stats->hist_tx[3] = TX_STAT(fp, etherStatsPkts256to511Octets); + stats->hist_tx[4] = TX_STAT(fp, etherStatsPkts512to1023Octets); + stats->hist_tx[5] = TX_STAT(fp, etherStatsPkts1024to1518Octets); + stats->hist_tx[6] = TX_STAT(fp, etherStatsPkts1519toMaxOctets); + + *ranges = rmon_ranges; +} + +static void fun_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->corrected_blocks.total = FEC_STAT(fp, Correctable); + stats->uncorrectable_blocks.total = FEC_STAT(fp, Uncorrectable); +} + +#undef RX_STAT +#undef TX_STAT +#undef FEC_STAT + +static int fun_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = netdev->real_num_rx_queues; + return 0; + default: + break; + } + return -EOPNOTSUPP; +} + +static int fun_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + return 0; +} + +static u32 fun_get_rxfh_indir_size(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return fp->indir_table_nentries; +} + +static u32 fun_get_rxfh_key_size(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return sizeof(fp->rss_key); +} + +static int fun_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!fp->rss_cfg) + return -EOPNOTSUPP; + + if (indir) + memcpy(indir, fp->indir_table, + sizeof(u32) * fp->indir_table_nentries); + + if (key) + memcpy(key, fp->rss_key, sizeof(fp->rss_key)); + + if (hfunc) + *hfunc = fp->hash_algo == FUN_ETH_RSS_ALG_TOEPLITZ ? + ETH_RSS_HASH_TOP : ETH_RSS_HASH_CRC32; + + return 0; +} + +static int fun_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct funeth_priv *fp = netdev_priv(netdev); + const u32 *rss_indir = indir ? indir : fp->indir_table; + const u8 *rss_key = key ? key : fp->rss_key; + enum fun_eth_hash_alg algo; + + if (!fp->rss_cfg) + return -EOPNOTSUPP; + + if (hfunc == ETH_RSS_HASH_NO_CHANGE) + algo = fp->hash_algo; + else if (hfunc == ETH_RSS_HASH_CRC32) + algo = FUN_ETH_RSS_ALG_CRC32; + else if (hfunc == ETH_RSS_HASH_TOP) + algo = FUN_ETH_RSS_ALG_TOEPLITZ; + else + return -EINVAL; + + /* If the port is enabled try to reconfigure RSS and keep the new + * settings if successful. If it is down we update the RSS settings + * and apply them at the next UP time. + */ + if (netif_running(netdev)) { + int rc = fun_config_rss(netdev, algo, rss_key, rss_indir, + FUN_ADMIN_SUBOP_MODIFY); + if (rc) + return rc; + } + + fp->hash_algo = algo; + if (key) + memcpy(fp->rss_key, key, sizeof(fp->rss_key)); + if (indir) + memcpy(fp->indir_table, indir, + sizeof(u32) * fp->indir_table_nentries); + return 0; +} + +static int fun_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = -1; + info->tx_types = BIT(HWTSTAMP_TX_OFF); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + return 0; +} + +static unsigned int to_ethtool_fec(unsigned int fun_fec) +{ + unsigned int fec = 0; + + if (fun_fec == FUN_PORT_FEC_NA) + fec |= ETHTOOL_FEC_NONE; + if (fun_fec & FUN_PORT_FEC_OFF) + fec |= ETHTOOL_FEC_OFF; + if (fun_fec & FUN_PORT_FEC_RS) + fec |= ETHTOOL_FEC_RS; + if (fun_fec & FUN_PORT_FEC_FC) + fec |= ETHTOOL_FEC_BASER; + if (fun_fec & FUN_PORT_FEC_AUTO) + fec |= ETHTOOL_FEC_AUTO; + return fec; +} + +static int fun_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 fec_data; + int rc; + + rc = fun_port_read_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, &fec_data); + if (rc) + return rc; + + fec->active_fec = to_ethtool_fec(fec_data & 0xff); + fec->fec = to_ethtool_fec(fec_data >> 8); + return 0; +} + +static int fun_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 fec_mode; + + switch (fec->fec) { + case ETHTOOL_FEC_AUTO: + fec_mode = FUN_PORT_FEC_AUTO; + break; + case ETHTOOL_FEC_OFF: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_NONE)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_OFF; + break; + case ETHTOOL_FEC_BASER: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_FC)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_FC; + break; + case ETHTOOL_FEC_RS: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_RS)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_RS; + break; + default: + return -EINVAL; + } + + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, fec_mode); +} + +static const struct ethtool_ops fun_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, + .get_link_ksettings = fun_get_link_ksettings, + .set_link_ksettings = fun_set_link_ksettings, + .set_phys_id = fun_set_phys_id, + .get_drvinfo = fun_get_drvinfo, + .get_msglevel = fun_get_msglevel, + .set_msglevel = fun_set_msglevel, + .get_regs_len = fun_get_regs_len, + .get_regs = fun_get_regs, + .get_link = ethtool_op_get_link, + .get_coalesce = fun_get_coalesce, + .set_coalesce = fun_set_coalesce, + .get_ts_info = fun_get_ts_info, + .get_ringparam = fun_get_ringparam, + .set_ringparam = fun_set_ringparam, + .get_sset_count = fun_get_sset_count, + .get_strings = fun_get_strings, + .get_ethtool_stats = fun_get_ethtool_stats, + .get_rxnfc = fun_get_rxnfc, + .set_rxnfc = fun_set_rxnfc, + .get_rxfh_indir_size = fun_get_rxfh_indir_size, + .get_rxfh_key_size = fun_get_rxfh_key_size, + .get_rxfh = fun_get_rxfh, + .set_rxfh = fun_set_rxfh, + .get_channels = fun_get_channels, + .set_channels = fun_set_channels, + .get_fecparam = fun_get_fecparam, + .set_fecparam = fun_set_fecparam, + .get_pauseparam = fun_get_pauseparam, + .set_pauseparam = fun_set_pauseparam, + .nway_reset = fun_restart_an, + .get_pause_stats = fun_get_pause_stats, + .get_fec_stats = fun_get_fec_stats, + .get_eth_mac_stats = fun_get_802_3_stats, + .get_eth_ctrl_stats = fun_get_802_3_ctrl_stats, + .get_rmon_stats = fun_get_rmon_stats, +}; + +void fun_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &fun_ethtool_ops; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.c b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c new file mode 100644 index 000000000000..f871def70d70 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include "funeth.h" +#include "funeth_ktls.h" + +static int fun_admin_ktls_create(struct funeth_priv *fp, unsigned int id) +{ + struct fun_admin_ktls_create_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + sizeof(req)), + .subop = FUN_ADMIN_SUBOP_CREATE, + .id = cpu_to_be32(id), + }; + + return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); +} + +static int fun_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + sizeof(req)), + .subop = FUN_ADMIN_SUBOP_MODIFY, + .id = cpu_to_be32(fp->ktls_id), + .tcp_seq = cpu_to_be32(start_offload_tcp_sn), + }; + struct fun_admin_ktls_modify_rsp rsp; + struct fun_ktls_tx_ctx *tx_ctx; + int rc; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return -EOPNOTSUPP; + + if (crypto_info->version == TLS_1_2_VERSION) + req.version = FUN_KTLS_TLSV2; + else + return -EOPNOTSUPP; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *c = (void *)crypto_info; + + req.cipher = FUN_KTLS_CIPHER_AES_GCM_128; + memcpy(req.key, c->key, sizeof(c->key)); + memcpy(req.iv, c->iv, sizeof(c->iv)); + memcpy(req.salt, c->salt, sizeof(c->salt)); + memcpy(req.record_seq, c->rec_seq, sizeof(c->rec_seq)); + break; + } + default: + return -EOPNOTSUPP; + } + + rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, &rsp, + sizeof(rsp), 0); + memzero_explicit(&req, sizeof(req)); + if (rc) + return rc; + + tx_ctx = tls_driver_ctx(sk, direction); + tx_ctx->tlsid = rsp.tlsid; + tx_ctx->next_seq = start_offload_tcp_sn; + atomic64_inc(&fp->tx_tls_add); + return 0; +} + +static void fun_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req; + struct fun_ktls_tx_ctx *tx_ctx; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return; + + tx_ctx = __tls_driver_ctx(tls_ctx, direction); + + req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + offsetof(struct fun_admin_ktls_modify_req, tcp_seq)); + req.subop = FUN_ADMIN_SUBOP_MODIFY; + req.flags = cpu_to_be16(FUN_KTLS_MODIFY_REMOVE); + req.id = cpu_to_be32(fp->ktls_id); + req.tlsid = tx_ctx->tlsid; + + fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); + atomic64_inc(&fp->tx_tls_del); +} + +static int fun_ktls_resync(struct net_device *netdev, struct sock *sk, u32 seq, + u8 *rcd_sn, enum tls_offload_ctx_dir direction) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req; + struct fun_ktls_tx_ctx *tx_ctx; + int rc; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return -EOPNOTSUPP; + + tx_ctx = tls_driver_ctx(sk, direction); + + req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + offsetof(struct fun_admin_ktls_modify_req, key)); + req.subop = FUN_ADMIN_SUBOP_MODIFY; + req.flags = 0; + req.id = cpu_to_be32(fp->ktls_id); + req.tlsid = tx_ctx->tlsid; + req.tcp_seq = cpu_to_be32(seq); + req.version = 0; + req.cipher = 0; + memcpy(req.record_seq, rcd_sn, sizeof(req.record_seq)); + + atomic64_inc(&fp->tx_tls_resync); + rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); + if (!rc) + tx_ctx->next_seq = seq; + return rc; +} + +static const struct tlsdev_ops fun_ktls_ops = { + .tls_dev_add = fun_ktls_add, + .tls_dev_del = fun_ktls_del, + .tls_dev_resync = fun_ktls_resync, +}; + +int fun_ktls_init(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + rc = fun_admin_ktls_create(fp, netdev->dev_port); + if (rc) + return rc; + + fp->ktls_id = netdev->dev_port; + netdev->tlsdev_ops = &fun_ktls_ops; + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + return 0; +} + +void fun_ktls_cleanup(struct funeth_priv *fp) +{ + if (fp->ktls_id == FUN_HCI_ID_INVALID) + return; + + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_KTLS, 0, fp->ktls_id); + fp->ktls_id = FUN_HCI_ID_INVALID; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.h b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h new file mode 100644 index 000000000000..9d6f2141a959 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_KTLS_H +#define _FUN_KTLS_H + +#include <net/tls.h> + +struct funeth_priv; + +struct fun_ktls_tx_ctx { + __be64 tlsid; + u32 next_seq; +}; + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +int fun_ktls_init(struct net_device *netdev); +void fun_ktls_cleanup(struct funeth_priv *fp); + +#else + +static inline void fun_ktls_init(struct net_device *netdev) +{ +} + +static inline void fun_ktls_cleanup(struct funeth_priv *fp) +{ +} +#endif + +#endif /* _FUN_KTLS_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c new file mode 100644 index 000000000000..67dd02ed1fa3 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -0,0 +1,2091 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/bpf.h> +#include <linux/crash_dump.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/idr.h> +#include <linux/if_vlan.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/rtnetlink.h> +#include <linux/inetdevice.h> + +#include "funeth.h" +#include "funeth_devlink.h" +#include "funeth_ktls.h" +#include "fun_port.h" +#include "fun_queue.h" +#include "funeth_txrx.h" + +#define ADMIN_SQ_DEPTH 32 +#define ADMIN_CQ_DEPTH 64 +#define ADMIN_RQ_DEPTH 16 + +/* Default number of Tx/Rx queues. */ +#define FUN_DFLT_QUEUES 16U + +enum { + FUN_SERV_RES_CHANGE = FUN_SERV_FIRST_AVAIL, + FUN_SERV_DEL_PORTS, +}; + +static const struct pci_device_id funeth_id_table[] = { + { PCI_VDEVICE(FUNGIBLE, 0x0101) }, + { PCI_VDEVICE(FUNGIBLE, 0x0181) }, + { 0, } +}; + +/* Issue a port write admin command with @n key/value pairs. */ +static int fun_port_write_cmds(struct funeth_priv *fp, unsigned int n, + const int *keys, const u64 *data) +{ + unsigned int cmd_size, i; + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + u8 v[ADMIN_SQE_SIZE]; + } cmd; + + cmd_size = offsetof(struct fun_admin_port_req, u.write.write48) + + n * sizeof(struct fun_admin_write48_req); + if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + cmd_size); + cmd.req.u.write = + FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE, 0, + fp->netdev->dev_port); + for (i = 0; i < n; i++) + cmd.req.u.write.write48[i] = + FUN_ADMIN_WRITE48_REQ_INIT(keys[i], data[i]); + + return fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, cmd_size, 0); +} + +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data) +{ + return fun_port_write_cmds(fp, 1, &key, &data); +} + +/* Issue a port read admin command with @n key/value pairs. */ +static int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n, + const int *keys, u64 *data) +{ + const struct fun_admin_read48_rsp *r48rsp; + unsigned int cmd_size, i; + int rc; + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + u8 v[ADMIN_SQE_SIZE]; + } cmd; + + cmd_size = offsetof(struct fun_admin_port_req, u.read.read48) + + n * sizeof(struct fun_admin_read48_req); + if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + cmd_size); + cmd.req.u.read = + FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ, 0, + fp->netdev->dev_port); + for (i = 0; i < n; i++) + cmd.req.u.read.read48[i] = FUN_ADMIN_READ48_REQ_INIT(keys[i]); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, cmd_size, 0); + if (rc) + return rc; + + for (r48rsp = cmd.rsp.u.read.read48, i = 0; i < n; i++, r48rsp++) { + data[i] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp->key_to_data); + dev_dbg(fp->fdev->dev, + "port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld", + fp->lport, r48rsp->key_to_data, keys[i], data[i], + FUN_ADMIN_READ48_RSP_RET_G(r48rsp->key_to_data)); + } + return 0; +} + +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data) +{ + return fun_port_read_cmds(fp, 1, &key, data); +} + +static void fun_report_link(struct net_device *netdev) +{ + if (netif_carrier_ok(netdev)) { + const struct funeth_priv *fp = netdev_priv(netdev); + const char *fec = "", *pause = ""; + int speed = fp->link_speed; + char unit = 'M'; + + if (fp->link_speed >= SPEED_1000) { + speed /= 1000; + unit = 'G'; + } + + if (fp->active_fec & FUN_PORT_FEC_RS) + fec = ", RS-FEC"; + else if (fp->active_fec & FUN_PORT_FEC_FC) + fec = ", BASER-FEC"; + + if ((fp->active_fc & FUN_PORT_CAP_PAUSE_MASK) == FUN_PORT_CAP_PAUSE_MASK) + pause = ", Tx/Rx PAUSE"; + else if (fp->active_fc & FUN_PORT_CAP_RX_PAUSE) + pause = ", Rx PAUSE"; + else if (fp->active_fc & FUN_PORT_CAP_TX_PAUSE) + pause = ", Tx PAUSE"; + + netdev_info(netdev, "Link up at %d %cb/s full-duplex%s%s\n", + speed, unit, pause, fec); + } else { + netdev_info(netdev, "Link down\n"); + } +} + +static int fun_adi_write(struct fun_dev *fdev, enum fun_admin_adi_attr attr, + unsigned int adi_id, const struct fun_adi_param *param) +{ + struct fun_admin_adi_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI, + sizeof(req)), + .u.write.subop = FUN_ADMIN_SUBOP_WRITE, + .u.write.attribute = attr, + .u.write.id = cpu_to_be32(adi_id), + .u.write.param = *param + }; + + return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0); +} + +/* Configure RSS for the given port. @op determines whether a new RSS context + * is to be created or whether an existing one should be reconfigured. The + * remaining parameters specify the hashing algorithm, key, and indirection + * table. + * + * This initiates packet delivery to the Rx queues set in the indirection + * table. + */ +int fun_config_rss(struct net_device *dev, int algo, const u8 *key, + const u32 *qtable, u8 op) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int table_len = fp->indir_table_nentries; + unsigned int len = FUN_ETH_RSS_MAX_KEY_SIZE + sizeof(u32) * table_len; + struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs); + union { + struct { + struct fun_admin_rss_req req; + struct fun_dataop_gl gl; + }; + struct fun_admin_generic_create_rsp rsp; + } cmd; + __be32 *indir_tab; + u16 flags; + int rc; + + if (op != FUN_ADMIN_SUBOP_CREATE && fp->rss_hw_id == FUN_HCI_ID_INVALID) + return -EINVAL; + + flags = op == FUN_ADMIN_SUBOP_CREATE ? + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR : 0; + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS, + sizeof(cmd)); + cmd.req.u.create = + FUN_ADMIN_RSS_CREATE_REQ_INIT(op, flags, fp->rss_hw_id, + dev->dev_port, algo, + FUN_ETH_RSS_MAX_KEY_SIZE, + table_len, 0, + FUN_ETH_RSS_MAX_KEY_SIZE); + cmd.req.u.create.dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len); + fun_dataop_gl_init(&cmd.gl, 0, 0, len, fp->rss_dma_addr); + + /* write the key and indirection table into the RSS DMA area */ + memcpy(fp->rss_cfg, key, FUN_ETH_RSS_MAX_KEY_SIZE); + indir_tab = fp->rss_cfg + FUN_ETH_RSS_MAX_KEY_SIZE; + for (rc = 0; rc < table_len; rc++) + *indir_tab++ = cpu_to_be32(rxqs[*qtable++]->hw_cqid); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (!rc && op == FUN_ADMIN_SUBOP_CREATE) + fp->rss_hw_id = be32_to_cpu(cmd.rsp.id); + return rc; +} + +/* Destroy the HW RSS conntext associated with the given port. This also stops + * all packet delivery to our Rx queues. + */ +static void fun_destroy_rss(struct funeth_priv *fp) +{ + if (fp->rss_hw_id != FUN_HCI_ID_INVALID) { + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_RSS, 0, fp->rss_hw_id); + fp->rss_hw_id = FUN_HCI_ID_INVALID; + } +} + +static void fun_irq_aff_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct fun_irq *p = container_of(notify, struct fun_irq, aff_notify); + + cpumask_copy(&p->affinity_mask, mask); +} + +static void fun_irq_aff_release(struct kref __always_unused *ref) +{ +} + +/* Allocate an IRQ structure, assign an MSI-X index and initial affinity to it, + * and add it to the IRQ XArray. + */ +static struct fun_irq *fun_alloc_qirq(struct funeth_priv *fp, unsigned int idx, + int node, unsigned int xa_idx_offset) +{ + struct fun_irq *irq; + int cpu, res; + + cpu = cpumask_local_spread(idx, node); + node = cpu_to_mem(cpu); + + irq = kzalloc_node(sizeof(*irq), GFP_KERNEL, node); + if (!irq) + return ERR_PTR(-ENOMEM); + + res = fun_reserve_irqs(fp->fdev, 1, &irq->irq_idx); + if (res != 1) + goto free_irq; + + res = xa_insert(&fp->irqs, idx + xa_idx_offset, irq, GFP_KERNEL); + if (res) + goto release_irq; + + irq->irq = pci_irq_vector(fp->pdev, irq->irq_idx); + cpumask_set_cpu(cpu, &irq->affinity_mask); + irq->aff_notify.notify = fun_irq_aff_notify; + irq->aff_notify.release = fun_irq_aff_release; + irq->state = FUN_IRQ_INIT; + return irq; + +release_irq: + fun_release_irqs(fp->fdev, 1, &irq->irq_idx); +free_irq: + kfree(irq); + return ERR_PTR(res); +} + +static void fun_free_qirq(struct funeth_priv *fp, struct fun_irq *irq) +{ + netif_napi_del(&irq->napi); + fun_release_irqs(fp->fdev, 1, &irq->irq_idx); + kfree(irq); +} + +/* Release the IRQs reserved for Tx/Rx queues that aren't being used. */ +static void fun_prune_queue_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int nreleased = 0; + struct fun_irq *irq; + unsigned long idx; + + xa_for_each(&fp->irqs, idx, irq) { + if (irq->txq || irq->rxq) /* skip those in use */ + continue; + + xa_erase(&fp->irqs, idx); + fun_free_qirq(fp, irq); + nreleased++; + if (idx < fp->rx_irq_ofst) + fp->num_tx_irqs--; + else + fp->num_rx_irqs--; + } + netif_info(fp, intr, dev, "Released %u queue IRQs\n", nreleased); +} + +/* Reserve IRQs, one per queue, to acommodate the requested queue numbers @ntx + * and @nrx. IRQs are added incrementally to those we already have. + * We hold on to allocated IRQs until garbage collection of unused IRQs is + * separately requested. + */ +static int fun_alloc_queue_irqs(struct net_device *dev, unsigned int ntx, + unsigned int nrx) +{ + struct funeth_priv *fp = netdev_priv(dev); + int node = dev_to_node(&fp->pdev->dev); + struct fun_irq *irq; + unsigned int i; + + for (i = fp->num_tx_irqs; i < ntx; i++) { + irq = fun_alloc_qirq(fp, i, node, 0); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + fp->num_tx_irqs++; + netif_tx_napi_add(dev, &irq->napi, fun_txq_napi_poll, + NAPI_POLL_WEIGHT); + } + + for (i = fp->num_rx_irqs; i < nrx; i++) { + irq = fun_alloc_qirq(fp, i, node, fp->rx_irq_ofst); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + fp->num_rx_irqs++; + netif_napi_add(dev, &irq->napi, fun_rxq_napi_poll, + NAPI_POLL_WEIGHT); + } + + netif_info(fp, intr, dev, "Reserved %u/%u IRQs for Tx/Rx queues\n", + ntx, nrx); + return 0; +} + +static void free_txqs(struct funeth_txq **txqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && txqs[i]; i++) + txqs[i] = funeth_txq_free(txqs[i], state); +} + +static int alloc_txqs(struct net_device *dev, struct funeth_txq **txqs, + unsigned int nqs, unsigned int depth, unsigned int start, + int state) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i; + int err; + + for (i = start; i < nqs; i++) { + err = funeth_txq_create(dev, i, depth, xa_load(&fp->irqs, i), + state, &txqs[i]); + if (err) { + free_txqs(txqs, nqs, start, FUN_QSTATE_DESTROYED); + return err; + } + } + return 0; +} + +static void free_rxqs(struct funeth_rxq **rxqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && rxqs[i]; i++) + rxqs[i] = funeth_rxq_free(rxqs[i], state); +} + +static int alloc_rxqs(struct net_device *dev, struct funeth_rxq **rxqs, + unsigned int nqs, unsigned int ncqe, unsigned int nrqe, + unsigned int start, int state) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i; + int err; + + for (i = start; i < nqs; i++) { + err = funeth_rxq_create(dev, i, ncqe, nrqe, + xa_load(&fp->irqs, i + fp->rx_irq_ofst), + state, &rxqs[i]); + if (err) { + free_rxqs(rxqs, nqs, start, FUN_QSTATE_DESTROYED); + return err; + } + } + return 0; +} + +static void free_xdpqs(struct funeth_txq **xdpqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && xdpqs[i]; i++) + xdpqs[i] = funeth_txq_free(xdpqs[i], state); + + if (state == FUN_QSTATE_DESTROYED) + kfree(xdpqs); +} + +static struct funeth_txq **alloc_xdpqs(struct net_device *dev, unsigned int nqs, + unsigned int depth, unsigned int start, + int state) +{ + struct funeth_txq **xdpqs; + unsigned int i; + int err; + + xdpqs = kcalloc(nqs, sizeof(*xdpqs), GFP_KERNEL); + if (!xdpqs) + return ERR_PTR(-ENOMEM); + + for (i = start; i < nqs; i++) { + err = funeth_txq_create(dev, i, depth, NULL, state, &xdpqs[i]); + if (err) { + free_xdpqs(xdpqs, nqs, start, FUN_QSTATE_DESTROYED); + return ERR_PTR(err); + } + } + return xdpqs; +} + +static void fun_free_rings(struct net_device *netdev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq **xdpqs = qset->xdpqs; + struct funeth_rxq **rxqs = qset->rxqs; + + /* qset may not specify any queues to operate on. In that case the + * currently installed queues are implied. + */ + if (!rxqs) { + rxqs = rtnl_dereference(fp->rxqs); + xdpqs = rtnl_dereference(fp->xdpqs); + qset->txqs = fp->txqs; + qset->nrxqs = netdev->real_num_rx_queues; + qset->ntxqs = netdev->real_num_tx_queues; + qset->nxdpqs = fp->num_xdpqs; + } + if (!rxqs) + return; + + if (rxqs == rtnl_dereference(fp->rxqs)) { + rcu_assign_pointer(fp->rxqs, NULL); + rcu_assign_pointer(fp->xdpqs, NULL); + synchronize_net(); + fp->txqs = NULL; + } + + free_rxqs(rxqs, qset->nrxqs, qset->rxq_start, qset->state); + free_txqs(qset->txqs, qset->ntxqs, qset->txq_start, qset->state); + free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, qset->state); + if (qset->state == FUN_QSTATE_DESTROYED) + kfree(rxqs); + + /* Tell the caller which queues were operated on. */ + qset->rxqs = rxqs; + qset->xdpqs = xdpqs; +} + +static int fun_alloc_rings(struct net_device *netdev, struct fun_qset *qset) +{ + struct funeth_txq **xdpqs = NULL, **txqs; + struct funeth_rxq **rxqs; + int err; + + err = fun_alloc_queue_irqs(netdev, qset->ntxqs, qset->nrxqs); + if (err) + return err; + + rxqs = kcalloc(qset->ntxqs + qset->nrxqs, sizeof(*rxqs), GFP_KERNEL); + if (!rxqs) + return -ENOMEM; + + if (qset->nxdpqs) { + xdpqs = alloc_xdpqs(netdev, qset->nxdpqs, qset->sq_depth, + qset->xdpq_start, qset->state); + if (IS_ERR(xdpqs)) { + err = PTR_ERR(xdpqs); + goto free_qvec; + } + } + + txqs = (struct funeth_txq **)&rxqs[qset->nrxqs]; + err = alloc_txqs(netdev, txqs, qset->ntxqs, qset->sq_depth, + qset->txq_start, qset->state); + if (err) + goto free_xdpqs; + + err = alloc_rxqs(netdev, rxqs, qset->nrxqs, qset->cq_depth, + qset->rq_depth, qset->rxq_start, qset->state); + if (err) + goto free_txqs; + + qset->rxqs = rxqs; + qset->txqs = txqs; + qset->xdpqs = xdpqs; + return 0; + +free_txqs: + free_txqs(txqs, qset->ntxqs, qset->txq_start, FUN_QSTATE_DESTROYED); +free_xdpqs: + free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, FUN_QSTATE_DESTROYED); +free_qvec: + kfree(rxqs); + return err; +} + +/* Take queues to the next level. Presently this means creating them on the + * device. + */ +static int fun_advance_ring_state(struct net_device *dev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(dev); + int i, err; + + for (i = 0; i < qset->nrxqs; i++) { + err = fun_rxq_create_dev(qset->rxqs[i], + xa_load(&fp->irqs, + i + fp->rx_irq_ofst)); + if (err) + goto out; + } + + for (i = 0; i < qset->ntxqs; i++) { + err = fun_txq_create_dev(qset->txqs[i], xa_load(&fp->irqs, i)); + if (err) + goto out; + } + + for (i = 0; i < qset->nxdpqs; i++) { + err = fun_txq_create_dev(qset->xdpqs[i], NULL); + if (err) + goto out; + } + + return 0; + +out: + fun_free_rings(dev, qset); + return err; +} + +static int fun_port_create(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + } cmd; + int rc; + + if (fp->lport != INVALID_LPORT) + return 0; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0, + netdev->dev_port); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd.rsp), 0); + + if (!rc) + fp->lport = be16_to_cpu(cmd.rsp.u.create.lport); + return rc; +} + +static int fun_port_destroy(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + if (fp->lport == INVALID_LPORT) + return 0; + + fp->lport = INVALID_LPORT; + return fun_res_destroy(fp->fdev, FUN_ADMIN_OP_PORT, 0, + netdev->dev_port); +} + +static int fun_eth_create(struct funeth_priv *fp) +{ + union { + struct fun_admin_eth_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + int rc; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH, + sizeof(cmd.req)); + cmd.req.u.create = FUN_ADMIN_ETH_CREATE_REQ_INIT( + FUN_ADMIN_SUBOP_CREATE, + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, + 0, fp->netdev->dev_port); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd.rsp), 0); + return rc ? rc : be32_to_cpu(cmd.rsp.id); +} + +static int fun_vi_create(struct funeth_priv *fp) +{ + struct fun_admin_vi_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI, + sizeof(req)), + .u.create = FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, + 0, + fp->netdev->dev_port, + fp->netdev->dev_port) + }; + + return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); +} + +/* Helper to create an ETH flow and bind an SQ to it. + * Returns the ETH id (>= 0) on success or a negative error. + */ +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid) +{ + int rc, ethid; + + ethid = fun_eth_create(fp); + if (ethid >= 0) { + rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_EPSQ, sqid, + FUN_ADMIN_BIND_TYPE_ETH, ethid); + if (rc) { + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, ethid); + ethid = rc; + } + } + return ethid; +} + +static irqreturn_t fun_queue_irq_handler(int irq, void *data) +{ + struct fun_irq *p = data; + + if (p->rxq) { + prefetch(p->rxq->next_cqe_info); + p->rxq->irq_cnt++; + } + napi_schedule_irqoff(&p->napi); + return IRQ_HANDLED; +} + +static int fun_enable_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned long idx, last; + unsigned int qidx; + struct fun_irq *p; + const char *qtype; + int err; + + xa_for_each(&fp->irqs, idx, p) { + if (p->txq) { + qtype = "tx"; + qidx = p->txq->qidx; + } else if (p->rxq) { + qtype = "rx"; + qidx = p->rxq->qidx; + } else { + continue; + } + + if (p->state != FUN_IRQ_INIT) + continue; + + snprintf(p->name, sizeof(p->name) - 1, "%s-%s-%u", dev->name, + qtype, qidx); + err = request_irq(p->irq, fun_queue_irq_handler, 0, p->name, p); + if (err) { + netdev_err(dev, "Failed to allocate IRQ %u, err %d\n", + p->irq, err); + goto unroll; + } + p->state = FUN_IRQ_REQUESTED; + } + + xa_for_each(&fp->irqs, idx, p) { + if (p->state != FUN_IRQ_REQUESTED) + continue; + irq_set_affinity_notifier(p->irq, &p->aff_notify); + irq_set_affinity_and_hint(p->irq, &p->affinity_mask); + napi_enable(&p->napi); + p->state = FUN_IRQ_ENABLED; + } + + return 0; + +unroll: + last = idx - 1; + xa_for_each_range(&fp->irqs, idx, p, 0, last) + if (p->state == FUN_IRQ_REQUESTED) { + free_irq(p->irq, p); + p->state = FUN_IRQ_INIT; + } + + return err; +} + +static void fun_disable_one_irq(struct fun_irq *irq) +{ + napi_disable(&irq->napi); + irq_set_affinity_notifier(irq->irq, NULL); + irq_update_affinity_hint(irq->irq, NULL); + free_irq(irq->irq, irq); + irq->state = FUN_IRQ_INIT; +} + +static void fun_disable_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_irq *p; + unsigned long idx; + + xa_for_each(&fp->irqs, idx, p) + if (p->state == FUN_IRQ_ENABLED) + fun_disable_one_irq(p); +} + +static void fun_down(struct net_device *dev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(dev); + + /* If we don't have queues the data path is already down. + * Note netif_running(dev) may be true. + */ + if (!rcu_access_pointer(fp->rxqs)) + return; + + /* It is also down if the queues aren't on the device. */ + if (fp->txqs[0]->init_state >= FUN_QSTATE_INIT_FULL) { + netif_info(fp, ifdown, dev, + "Tearing down data path on device\n"); + fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_DISABLE, 0); + + netif_carrier_off(dev); + netif_tx_disable(dev); + + fun_destroy_rss(fp); + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port); + fun_disable_irqs(dev); + } + + fun_free_rings(dev, qset); +} + +static int fun_up(struct net_device *dev, struct fun_qset *qset) +{ + static const int port_keys[] = { + FUN_ADMIN_PORT_KEY_STATS_DMA_LOW, + FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH, + FUN_ADMIN_PORT_KEY_ENABLE + }; + + struct funeth_priv *fp = netdev_priv(dev); + u64 vals[] = { + lower_32_bits(fp->stats_dma_addr), + upper_32_bits(fp->stats_dma_addr), + FUN_PORT_FLAG_ENABLE_NOTIFY + }; + int err; + + netif_info(fp, ifup, dev, "Setting up data path on device\n"); + + if (qset->rxqs[0]->init_state < FUN_QSTATE_INIT_FULL) { + err = fun_advance_ring_state(dev, qset); + if (err) + return err; + } + + err = fun_vi_create(fp); + if (err) + goto free_queues; + + fp->txqs = qset->txqs; + rcu_assign_pointer(fp->rxqs, qset->rxqs); + rcu_assign_pointer(fp->xdpqs, qset->xdpqs); + + err = fun_enable_irqs(dev); + if (err) + goto destroy_vi; + + if (fp->rss_cfg) { + err = fun_config_rss(dev, fp->hash_algo, fp->rss_key, + fp->indir_table, FUN_ADMIN_SUBOP_CREATE); + } else { + /* The non-RSS case has only 1 queue. */ + err = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_VI, dev->dev_port, + FUN_ADMIN_BIND_TYPE_EPCQ, + qset->rxqs[0]->hw_cqid); + } + if (err) + goto disable_irqs; + + err = fun_port_write_cmds(fp, 3, port_keys, vals); + if (err) + goto free_rss; + + netif_tx_start_all_queues(dev); + return 0; + +free_rss: + fun_destroy_rss(fp); +disable_irqs: + fun_disable_irqs(dev); +destroy_vi: + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port); +free_queues: + fun_free_rings(dev, qset); + return err; +} + +static int funeth_open(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_qset qset = { + .nrxqs = netdev->real_num_rx_queues, + .ntxqs = netdev->real_num_tx_queues, + .nxdpqs = fp->num_xdpqs, + .cq_depth = fp->cq_depth, + .rq_depth = fp->rq_depth, + .sq_depth = fp->sq_depth, + .state = FUN_QSTATE_INIT_FULL, + }; + int rc; + + rc = fun_alloc_rings(netdev, &qset); + if (rc) + return rc; + + rc = fun_up(netdev, &qset); + if (rc) { + qset.state = FUN_QSTATE_DESTROYED; + fun_free_rings(netdev, &qset); + } + + return rc; +} + +static int funeth_close(struct net_device *netdev) +{ + struct fun_qset qset = { .state = FUN_QSTATE_DESTROYED }; + + fun_down(netdev, &qset); + return 0; +} + +static void fun_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i, start; + + stats->tx_packets = fp->tx_packets; + stats->tx_bytes = fp->tx_bytes; + stats->tx_dropped = fp->tx_dropped; + + stats->rx_packets = fp->rx_packets; + stats->rx_bytes = fp->rx_bytes; + stats->rx_dropped = fp->rx_dropped; + + rcu_read_lock(); + rxqs = rcu_dereference(fp->rxqs); + if (!rxqs) + goto unlock; + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + struct funeth_txq_stats txs; + + FUN_QSTAT_READ(fp->txqs[i], start, txs); + stats->tx_packets += txs.tx_pkts; + stats->tx_bytes += txs.tx_bytes; + stats->tx_dropped += txs.tx_map_err; + } + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + struct funeth_rxq_stats rxs; + + FUN_QSTAT_READ(rxqs[i], start, rxs); + stats->rx_packets += rxs.rx_pkts; + stats->rx_bytes += rxs.rx_bytes; + stats->rx_dropped += rxs.rx_map_err + rxs.rx_mem_drops; + } + + xdpqs = rcu_dereference(fp->xdpqs); + if (!xdpqs) + goto unlock; + + for (i = 0; i < fp->num_xdpqs; i++) { + struct funeth_txq_stats txs; + + FUN_QSTAT_READ(xdpqs[i], start, txs); + stats->tx_packets += txs.tx_pkts; + stats->tx_bytes += txs.tx_bytes; + } +unlock: + rcu_read_unlock(); +} + +static int fun_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu); + if (!rc) + netdev->mtu = new_mtu; + return rc; +} + +static int fun_set_macaddr(struct net_device *netdev, void *addr) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct sockaddr *saddr = addr; + int rc; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + if (ether_addr_equal(netdev->dev_addr, saddr->sa_data)) + return 0; + + rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR, + ether_addr_to_u64(saddr->sa_data)); + if (!rc) + eth_hw_addr_set(netdev, saddr->sa_data); + return rc; +} + +static int fun_get_port_attributes(struct net_device *netdev) +{ + static const int keys[] = { + FUN_ADMIN_PORT_KEY_MACADDR, FUN_ADMIN_PORT_KEY_CAPABILITIES, + FUN_ADMIN_PORT_KEY_ADVERT, FUN_ADMIN_PORT_KEY_MTU + }; + static const int phys_keys[] = { + FUN_ADMIN_PORT_KEY_LANE_ATTRS, + }; + + struct funeth_priv *fp = netdev_priv(netdev); + u64 data[ARRAY_SIZE(keys)]; + u8 mac[ETH_ALEN]; + int i, rc; + + rc = fun_port_read_cmds(fp, ARRAY_SIZE(keys), keys, data); + if (rc) + return rc; + + for (i = 0; i < ARRAY_SIZE(keys); i++) { + switch (keys[i]) { + case FUN_ADMIN_PORT_KEY_MACADDR: + u64_to_ether_addr(data[i], mac); + if (is_zero_ether_addr(mac)) { + eth_hw_addr_random(netdev); + } else if (is_valid_ether_addr(mac)) { + eth_hw_addr_set(netdev, mac); + } else { + netdev_err(netdev, + "device provided a bad MAC address %pM\n", + mac); + return -EINVAL; + } + break; + + case FUN_ADMIN_PORT_KEY_CAPABILITIES: + fp->port_caps = data[i]; + break; + + case FUN_ADMIN_PORT_KEY_ADVERT: + fp->advertising = data[i]; + break; + + case FUN_ADMIN_PORT_KEY_MTU: + netdev->mtu = data[i]; + break; + } + } + + if (!(fp->port_caps & FUN_PORT_CAP_VPORT)) { + rc = fun_port_read_cmds(fp, ARRAY_SIZE(phys_keys), phys_keys, + data); + if (rc) + return rc; + + fp->lane_attrs = data[0]; + } + + if (netdev->addr_assign_type == NET_ADDR_RANDOM) + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR, + ether_addr_to_u64(netdev->dev_addr)); + return 0; +} + +static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + const struct funeth_priv *fp = netdev_priv(dev); + + return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg, + sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0; +} + +static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct hwtstamp_config cfg; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + /* no TX HW timestamps */ + cfg.tx_type = HWTSTAMP_TX_OFF; + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + cfg.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + fp->hwtstamp_cfg = cfg; + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCSHWTSTAMP: + return fun_hwtstamp_set(dev, ifr); + case SIOCGHWTSTAMP: + return fun_hwtstamp_get(dev, ifr); + default: + return -EOPNOTSUPP; + } +} + +/* Prepare the queues for XDP. */ +static int fun_enter_xdp(struct net_device *dev, struct bpf_prog *prog) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i, nqs = num_online_cpus(); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + int err; + + xdpqs = alloc_xdpqs(dev, nqs, fp->sq_depth, 0, FUN_QSTATE_INIT_FULL); + if (IS_ERR(xdpqs)) + return PTR_ERR(xdpqs); + + rxqs = rtnl_dereference(fp->rxqs); + for (i = 0; i < dev->real_num_rx_queues; i++) { + err = fun_rxq_set_bpf(rxqs[i], prog); + if (err) + goto out; + } + + fp->num_xdpqs = nqs; + rcu_assign_pointer(fp->xdpqs, xdpqs); + return 0; +out: + while (i--) + fun_rxq_set_bpf(rxqs[i], NULL); + + free_xdpqs(xdpqs, nqs, 0, FUN_QSTATE_DESTROYED); + return err; +} + +/* Set the queues for non-XDP operation. */ +static void fun_end_xdp(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i; + + xdpqs = rtnl_dereference(fp->xdpqs); + rcu_assign_pointer(fp->xdpqs, NULL); + synchronize_net(); + /* at this point both Rx and Tx XDP processing has ended */ + + free_xdpqs(xdpqs, fp->num_xdpqs, 0, FUN_QSTATE_DESTROYED); + fp->num_xdpqs = 0; + + rxqs = rtnl_dereference(fp->rxqs); + for (i = 0; i < dev->real_num_rx_queues; i++) + fun_rxq_set_bpf(rxqs[i], NULL); +} + +#define XDP_MAX_MTU \ + (PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM) + +static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct bpf_prog *old_prog, *prog = xdp->prog; + struct funeth_priv *fp = netdev_priv(dev); + int i, err; + + /* XDP uses at most one buffer */ + if (prog && dev->mtu > XDP_MAX_MTU) { + netdev_err(dev, "device MTU %u too large for XDP\n", dev->mtu); + NL_SET_ERR_MSG_MOD(xdp->extack, + "Device MTU too large for XDP"); + return -EINVAL; + } + + if (!netif_running(dev)) { + fp->num_xdpqs = prog ? num_online_cpus() : 0; + } else if (prog && !fp->xdp_prog) { + err = fun_enter_xdp(dev, prog); + if (err) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "Failed to set queues for XDP."); + return err; + } + } else if (!prog && fp->xdp_prog) { + fun_end_xdp(dev); + } else { + struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs); + + for (i = 0; i < dev->real_num_rx_queues; i++) + WRITE_ONCE(rxqs[i]->xdp_prog, prog); + } + + dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU; + old_prog = xchg(&fp->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int fun_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return fun_xdp_setup(dev, xdp); + default: + return -EINVAL; + } +} + +static struct devlink_port *fun_get_devlink_port(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + return &fp->dl_port; +} + +static int fun_init_vports(struct fun_ethdev *ed, unsigned int n) +{ + if (ed->num_vports) + return -EINVAL; + + ed->vport_info = kvcalloc(n, sizeof(*ed->vport_info), GFP_KERNEL); + if (!ed->vport_info) + return -ENOMEM; + ed->num_vports = n; + return 0; +} + +static void fun_free_vports(struct fun_ethdev *ed) +{ + kvfree(ed->vport_info); + ed->vport_info = NULL; + ed->num_vports = 0; +} + +static struct fun_vport_info *fun_get_vport(struct fun_ethdev *ed, + unsigned int vport) +{ + if (!ed->vport_info || vport >= ed->num_vports) + return NULL; + + return ed->vport_info + vport; +} + +static int fun_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param mac_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (is_multicast_ether_addr(mac)) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + mac_param.u.mac = FUN_ADI_MAC_INIT(ether_addr_to_u64(mac)); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_MACADDR, vf + 1, + &mac_param); + if (!rc) + ether_addr_copy(vi->mac, mac); +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param vlan_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (vlan > 4095 || qos > 7) + return -EINVAL; + if (vlan_proto && vlan_proto != htons(ETH_P_8021Q) && + vlan_proto != htons(ETH_P_8021AD)) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + vlan_param.u.vlan = FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto), + ((u16)qos << VLAN_PRIO_SHIFT) | vlan); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_VLAN, vf + 1, &vlan_param); + if (!rc) { + vi->vlan = vlan; + vi->qos = qos; + vi->vlan_proto = vlan_proto; + } +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param rate_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (min_tx_rate) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + rate_param.u.rate = FUN_ADI_RATE_INIT(max_tx_rate); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_RATE, vf + 1, &rate_param); + if (!rc) + vi->max_rate = max_tx_rate; +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_get_vf_config(struct net_device *dev, int vf, + struct ifla_vf_info *ivi) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_ethdev *ed = to_fun_ethdev(fp->fdev); + const struct fun_vport_info *vi; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vf; + ether_addr_copy(ivi->mac, vi->mac); + ivi->vlan = vi->vlan; + ivi->qos = vi->qos; + ivi->vlan_proto = vi->vlan_proto; + ivi->max_tx_rate = vi->max_rate; + ivi->spoofchk = vi->spoofchk; +unlock: + mutex_unlock(&ed->state_mutex); + return vi ? 0 : -EINVAL; +} + +static void fun_uninit(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + + fun_prune_queue_irqs(dev); + xa_destroy(&fp->irqs); +} + +static const struct net_device_ops fun_netdev_ops = { + .ndo_open = funeth_open, + .ndo_stop = funeth_close, + .ndo_start_xmit = fun_start_xmit, + .ndo_get_stats64 = fun_get_stats64, + .ndo_change_mtu = fun_change_mtu, + .ndo_set_mac_address = fun_set_macaddr, + .ndo_validate_addr = eth_validate_addr, + .ndo_eth_ioctl = fun_ioctl, + .ndo_uninit = fun_uninit, + .ndo_bpf = fun_xdp, + .ndo_xdp_xmit = fun_xdp_xmit_frames, + .ndo_set_vf_mac = fun_set_vf_mac, + .ndo_set_vf_vlan = fun_set_vf_vlan, + .ndo_set_vf_rate = fun_set_vf_rate, + .ndo_get_vf_config = fun_get_vf_config, + .ndo_get_devlink_port = fun_get_devlink_port, +}; + +#define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) +#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \ + GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA) + +static void fun_dflt_rss_indir(struct funeth_priv *fp, unsigned int nrx) +{ + unsigned int i; + + for (i = 0; i < fp->indir_table_nentries; i++) + fp->indir_table[i] = ethtool_rxfh_indir_default(i, nrx); +} + +/* Reset the RSS indirection table to equal distribution across the current + * number of Rx queues. Called at init time and whenever the number of Rx + * queues changes subsequently. Note that this may also resize the indirection + * table. + */ +static void fun_reset_rss_indir(struct net_device *dev, unsigned int nrx) +{ + struct funeth_priv *fp = netdev_priv(dev); + + if (!fp->rss_cfg) + return; + + /* Set the table size to the max possible that allows an equal number + * of occurrences of each CQ. + */ + fp->indir_table_nentries = rounddown(FUN_ETH_RSS_MAX_INDIR_ENT, nrx); + fun_dflt_rss_indir(fp, nrx); +} + +/* Update the RSS LUT to contain only queues in [0, nrx). Normally this will + * update the LUT to an equal distribution among nrx queues, If @only_if_needed + * is set the LUT is left unchanged if it already does not reference any queues + * >= nrx. + */ +static int fun_rss_set_qnum(struct net_device *dev, unsigned int nrx, + bool only_if_needed) +{ + struct funeth_priv *fp = netdev_priv(dev); + u32 old_lut[FUN_ETH_RSS_MAX_INDIR_ENT]; + unsigned int i, oldsz; + int err; + + if (!fp->rss_cfg) + return 0; + + if (only_if_needed) { + for (i = 0; i < fp->indir_table_nentries; i++) + if (fp->indir_table[i] >= nrx) + break; + + if (i >= fp->indir_table_nentries) + return 0; + } + + memcpy(old_lut, fp->indir_table, sizeof(old_lut)); + oldsz = fp->indir_table_nentries; + fun_reset_rss_indir(dev, nrx); + + err = fun_config_rss(dev, fp->hash_algo, fp->rss_key, + fp->indir_table, FUN_ADMIN_SUBOP_MODIFY); + if (!err) + return 0; + + memcpy(fp->indir_table, old_lut, sizeof(old_lut)); + fp->indir_table_nentries = oldsz; + return err; +} + +/* Allocate the DMA area for the RSS configuration commands to the device, and + * initialize the hash, hash key, indirection table size and its entries to + * their defaults. The indirection table defaults to equal distribution across + * the Rx queues. + */ +static int fun_init_rss(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + size_t size = sizeof(fp->rss_key) + sizeof(fp->indir_table); + + fp->rss_hw_id = FUN_HCI_ID_INVALID; + if (!(fp->port_caps & FUN_PORT_CAP_OFFLOADS)) + return 0; + + fp->rss_cfg = dma_alloc_coherent(&fp->pdev->dev, size, + &fp->rss_dma_addr, GFP_KERNEL); + if (!fp->rss_cfg) + return -ENOMEM; + + fp->hash_algo = FUN_ETH_RSS_ALG_TOEPLITZ; + netdev_rss_key_fill(fp->rss_key, sizeof(fp->rss_key)); + fun_reset_rss_indir(dev, dev->real_num_rx_queues); + return 0; +} + +static void fun_free_rss(struct funeth_priv *fp) +{ + if (fp->rss_cfg) { + dma_free_coherent(&fp->pdev->dev, + sizeof(fp->rss_key) + sizeof(fp->indir_table), + fp->rss_cfg, fp->rss_dma_addr); + fp->rss_cfg = NULL; + } +} + +void fun_set_ring_count(struct net_device *netdev, unsigned int ntx, + unsigned int nrx) +{ + netif_set_real_num_tx_queues(netdev, ntx); + if (nrx != netdev->real_num_rx_queues) { + netif_set_real_num_rx_queues(netdev, nrx); + fun_reset_rss_indir(netdev, nrx); + } +} + +static int fun_init_stats_area(struct funeth_priv *fp) +{ + unsigned int nstats; + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return 0; + + nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX + + PORT_MAC_FEC_STATS_MAX; + + fp->stats = dma_alloc_coherent(&fp->pdev->dev, nstats * sizeof(u64), + &fp->stats_dma_addr, GFP_KERNEL); + if (!fp->stats) + return -ENOMEM; + return 0; +} + +static void fun_free_stats_area(struct funeth_priv *fp) +{ + unsigned int nstats; + + if (fp->stats) { + nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX; + dma_free_coherent(&fp->pdev->dev, nstats * sizeof(u64), + fp->stats, fp->stats_dma_addr); + fp->stats = NULL; + } +} + +static int fun_dl_port_register(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct devlink *dl = priv_to_devlink(fp->fdev); + struct devlink_port_attrs attrs = {}; + unsigned int idx; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; + idx = fp->lport; + } else { + idx = netdev->dev_port; + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.lanes = fp->lane_attrs & 7; + if (fp->lane_attrs & FUN_PORT_LANE_SPLIT) { + attrs.split = 1; + attrs.phys.port_number = fp->lport & ~3; + attrs.phys.split_subport_number = fp->lport & 3; + } else { + attrs.phys.port_number = fp->lport; + } + } + + devlink_port_attrs_set(&fp->dl_port, &attrs); + + return devlink_port_register(dl, &fp->dl_port, idx); +} + +/* Determine the max Tx/Rx queues for a port. */ +static int fun_max_qs(struct fun_ethdev *ed, unsigned int *ntx, + unsigned int *nrx) +{ + int neth; + + if (ed->num_ports > 1 || is_kdump_kernel()) { + *ntx = 1; + *nrx = 1; + return 0; + } + + neth = fun_get_res_count(&ed->fdev, FUN_ADMIN_OP_ETH); + if (neth < 0) + return neth; + + /* We determine the max number of queues based on the CPU + * cores, device interrupts and queues, RSS size, and device Tx flows. + * + * - At least 1 Rx and 1 Tx queues. + * - At most 1 Rx/Tx queue per core. + * - Each Rx/Tx queue needs 1 SQ. + */ + *ntx = min(ed->nsqs_per_port - 1, num_online_cpus()); + *nrx = *ntx; + if (*ntx > neth) + *ntx = neth; + if (*nrx > FUN_ETH_RSS_MAX_INDIR_ENT) + *nrx = FUN_ETH_RSS_MAX_INDIR_ENT; + return 0; +} + +static void fun_queue_defaults(struct net_device *dev, unsigned int nsqs) +{ + unsigned int ntx, nrx; + + ntx = min(dev->num_tx_queues, FUN_DFLT_QUEUES); + nrx = min(dev->num_rx_queues, FUN_DFLT_QUEUES); + if (ntx <= nrx) { + ntx = min(ntx, nsqs / 2); + nrx = min(nrx, nsqs - ntx); + } else { + nrx = min(nrx, nsqs / 2); + ntx = min(ntx, nsqs - nrx); + } + + netif_set_real_num_tx_queues(dev, ntx); + netif_set_real_num_rx_queues(dev, nrx); +} + +/* Replace the existing Rx/Tx/XDP queues with equal number of queues with + * different settings, e.g. depth. This is a disruptive replacement that + * temporarily shuts down the data path and should be limited to changes that + * can't be applied to live queues. The old queues are always discarded. + */ +int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs, + struct netlink_ext_ack *extack) +{ + struct fun_qset oldqs = { .state = FUN_QSTATE_DESTROYED }; + struct funeth_priv *fp = netdev_priv(dev); + int err; + + newqs->nrxqs = dev->real_num_rx_queues; + newqs->ntxqs = dev->real_num_tx_queues; + newqs->nxdpqs = fp->num_xdpqs; + newqs->state = FUN_QSTATE_INIT_SW; + err = fun_alloc_rings(dev, newqs); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to allocate memory for new queues, keeping current settings"); + return err; + } + + fun_down(dev, &oldqs); + + err = fun_up(dev, newqs); + if (!err) + return 0; + + /* The new queues couldn't be installed. We do not retry the old queues + * as they are the same to the device as the new queues and would + * similarly fail. + */ + newqs->state = FUN_QSTATE_DESTROYED; + fun_free_rings(dev, newqs); + NL_SET_ERR_MSG_MOD(extack, "Unable to restore the data path with the new queues."); + return err; +} + +/* Change the number of Rx/Tx queues of a device while it is up. This is done + * by incrementally adding/removing queues to meet the new requirements while + * handling ongoing traffic. + */ +int fun_change_num_queues(struct net_device *dev, unsigned int ntx, + unsigned int nrx) +{ + unsigned int keep_tx = min(dev->real_num_tx_queues, ntx); + unsigned int keep_rx = min(dev->real_num_rx_queues, nrx); + struct funeth_priv *fp = netdev_priv(dev); + struct fun_qset oldqs = { + .rxqs = rtnl_dereference(fp->rxqs), + .txqs = fp->txqs, + .nrxqs = dev->real_num_rx_queues, + .ntxqs = dev->real_num_tx_queues, + .rxq_start = keep_rx, + .txq_start = keep_tx, + .state = FUN_QSTATE_DESTROYED + }; + struct fun_qset newqs = { + .nrxqs = nrx, + .ntxqs = ntx, + .rxq_start = keep_rx, + .txq_start = keep_tx, + .cq_depth = fp->cq_depth, + .rq_depth = fp->rq_depth, + .sq_depth = fp->sq_depth, + .state = FUN_QSTATE_INIT_FULL + }; + int i, err; + + err = fun_alloc_rings(dev, &newqs); + if (err) + goto free_irqs; + + err = fun_enable_irqs(dev); /* of any newly added queues */ + if (err) + goto free_rings; + + /* copy the queues we are keeping to the new set */ + memcpy(newqs.rxqs, oldqs.rxqs, keep_rx * sizeof(*oldqs.rxqs)); + memcpy(newqs.txqs, fp->txqs, keep_tx * sizeof(*fp->txqs)); + + if (nrx < dev->real_num_rx_queues) { + err = fun_rss_set_qnum(dev, nrx, true); + if (err) + goto disable_tx_irqs; + + for (i = nrx; i < dev->real_num_rx_queues; i++) + fun_disable_one_irq(container_of(oldqs.rxqs[i]->napi, + struct fun_irq, napi)); + + netif_set_real_num_rx_queues(dev, nrx); + } + + if (ntx < dev->real_num_tx_queues) + netif_set_real_num_tx_queues(dev, ntx); + + rcu_assign_pointer(fp->rxqs, newqs.rxqs); + fp->txqs = newqs.txqs; + synchronize_net(); + + if (ntx > dev->real_num_tx_queues) + netif_set_real_num_tx_queues(dev, ntx); + + if (nrx > dev->real_num_rx_queues) { + netif_set_real_num_rx_queues(dev, nrx); + fun_rss_set_qnum(dev, nrx, false); + } + + /* disable interrupts of any excess Tx queues */ + for (i = keep_tx; i < oldqs.ntxqs; i++) + fun_disable_one_irq(oldqs.txqs[i]->irq); + + fun_free_rings(dev, &oldqs); + fun_prune_queue_irqs(dev); + return 0; + +disable_tx_irqs: + for (i = oldqs.ntxqs; i < ntx; i++) + fun_disable_one_irq(newqs.txqs[i]->irq); +free_rings: + newqs.state = FUN_QSTATE_DESTROYED; + fun_free_rings(dev, &newqs); +free_irqs: + fun_prune_queue_irqs(dev); + return err; +} + +static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid) +{ + struct fun_dev *fdev = &ed->fdev; + struct net_device *netdev; + struct funeth_priv *fp; + unsigned int ntx, nrx; + int rc; + + rc = fun_max_qs(ed, &ntx, &nrx); + if (rc) + return rc; + + netdev = alloc_etherdev_mqs(sizeof(*fp), ntx, nrx); + if (!netdev) { + rc = -ENOMEM; + goto done; + } + + netdev->dev_port = portid; + fun_queue_defaults(netdev, ed->nsqs_per_port); + + fp = netdev_priv(netdev); + fp->fdev = fdev; + fp->pdev = to_pci_dev(fdev->dev); + fp->netdev = netdev; + xa_init(&fp->irqs); + fp->rx_irq_ofst = ntx; + seqcount_init(&fp->link_seq); + + fp->lport = INVALID_LPORT; + rc = fun_port_create(netdev); + if (rc) + goto free_netdev; + + /* bind port to admin CQ for async events */ + rc = fun_bind(fdev, FUN_ADMIN_BIND_TYPE_PORT, portid, + FUN_ADMIN_BIND_TYPE_EPCQ, 0); + if (rc) + goto destroy_port; + + rc = fun_get_port_attributes(netdev); + if (rc) + goto destroy_port; + + rc = fun_init_rss(netdev); + if (rc) + goto destroy_port; + + rc = fun_init_stats_area(fp); + if (rc) + goto free_rss; + + SET_NETDEV_DEV(netdev, fdev->dev); + netdev->netdev_ops = &fun_netdev_ops; + + netdev->hw_features = NETIF_F_SG | NETIF_F_RXHASH | NETIF_F_RXCSUM; + if (fp->port_caps & FUN_PORT_CAP_OFFLOADS) + netdev->hw_features |= NETIF_F_HW_CSUM | TSO_FLAGS; + if (fp->port_caps & FUN_PORT_CAP_ENCAP_OFFLOADS) + netdev->hw_features |= GSO_ENCAP_FLAGS; + + netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA; + netdev->vlan_features = netdev->features & VLAN_FEAT; + netdev->mpls_features = netdev->vlan_features; + netdev->hw_enc_features = netdev->hw_features; + + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = FUN_MAX_MTU; + + fun_set_ethtool_ops(netdev); + + /* configurable parameters */ + fp->sq_depth = min(SQ_DEPTH, fdev->q_depth); + fp->cq_depth = min(CQ_DEPTH, fdev->q_depth); + fp->rq_depth = min_t(unsigned int, RQ_DEPTH, fdev->q_depth); + fp->rx_coal_usec = CQ_INTCOAL_USEC; + fp->rx_coal_count = CQ_INTCOAL_NPKT; + fp->tx_coal_usec = SQ_INTCOAL_USEC; + fp->tx_coal_count = SQ_INTCOAL_NPKT; + fp->cq_irq_db = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count); + + rc = fun_dl_port_register(netdev); + if (rc) + goto free_stats; + + fp->ktls_id = FUN_HCI_ID_INVALID; + fun_ktls_init(netdev); /* optional, failure OK */ + + netif_carrier_off(netdev); + ed->netdevs[portid] = netdev; + rc = register_netdev(netdev); + if (rc) + goto unreg_devlink; + + if (fp->dl_port.devlink) + devlink_port_type_eth_set(&fp->dl_port, netdev); + + return 0; + +unreg_devlink: + ed->netdevs[portid] = NULL; + fun_ktls_cleanup(fp); + if (fp->dl_port.devlink) + devlink_port_unregister(&fp->dl_port); +free_stats: + fun_free_stats_area(fp); +free_rss: + fun_free_rss(fp); +destroy_port: + fun_port_destroy(netdev); +free_netdev: + free_netdev(netdev); +done: + dev_err(fdev->dev, "couldn't allocate port %u, error %d", portid, rc); + return rc; +} + +static void fun_destroy_netdev(struct net_device *netdev) +{ + struct funeth_priv *fp; + + fp = netdev_priv(netdev); + if (fp->dl_port.devlink) { + devlink_port_type_clear(&fp->dl_port); + devlink_port_unregister(&fp->dl_port); + } + unregister_netdev(netdev); + fun_ktls_cleanup(fp); + fun_free_stats_area(fp); + fun_free_rss(fp); + fun_port_destroy(netdev); + free_netdev(netdev); +} + +static int fun_create_ports(struct fun_ethdev *ed, unsigned int nports) +{ + struct fun_dev *fd = &ed->fdev; + int i, rc; + + /* The admin queue takes 1 IRQ and 2 SQs. */ + ed->nsqs_per_port = min(fd->num_irqs - 1, + fd->kern_end_qid - 2) / nports; + if (ed->nsqs_per_port < 2) { + dev_err(fd->dev, "Too few SQs for %u ports", nports); + return -EINVAL; + } + + ed->netdevs = kcalloc(nports, sizeof(*ed->netdevs), GFP_KERNEL); + if (!ed->netdevs) + return -ENOMEM; + + ed->num_ports = nports; + for (i = 0; i < nports; i++) { + rc = fun_create_netdev(ed, i); + if (rc) + goto free_netdevs; + } + + return 0; + +free_netdevs: + while (i) + fun_destroy_netdev(ed->netdevs[--i]); + kfree(ed->netdevs); + ed->netdevs = NULL; + ed->num_ports = 0; + return rc; +} + +static void fun_destroy_ports(struct fun_ethdev *ed) +{ + unsigned int i; + + for (i = 0; i < ed->num_ports; i++) + fun_destroy_netdev(ed->netdevs[i]); + + kfree(ed->netdevs); + ed->netdevs = NULL; + ed->num_ports = 0; +} + +static void fun_update_link_state(const struct fun_ethdev *ed, + const struct fun_admin_port_notif *notif) +{ + unsigned int port_idx = be16_to_cpu(notif->id); + struct net_device *netdev; + struct funeth_priv *fp; + + if (port_idx >= ed->num_ports) + return; + + netdev = ed->netdevs[port_idx]; + fp = netdev_priv(netdev); + + write_seqcount_begin(&fp->link_seq); + fp->link_speed = be32_to_cpu(notif->speed) * 10; /* 10 Mbps->Mbps */ + fp->active_fc = notif->flow_ctrl; + fp->active_fec = notif->fec; + fp->xcvr_type = notif->xcvr_type; + fp->link_down_reason = notif->link_down_reason; + fp->lp_advertising = be64_to_cpu(notif->lp_advertising); + + if ((notif->link_state | notif->missed_events) & FUN_PORT_FLAG_MAC_DOWN) + netif_carrier_off(netdev); + if (notif->link_state & FUN_PORT_FLAG_MAC_UP) + netif_carrier_on(netdev); + + write_seqcount_end(&fp->link_seq); + fun_report_link(netdev); +} + +/* handler for async events delivered through the admin CQ */ +static void fun_event_cb(struct fun_dev *fdev, void *entry) +{ + u8 op = ((struct fun_admin_rsp_common *)entry)->op; + + if (op == FUN_ADMIN_OP_PORT) { + const struct fun_admin_port_notif *rsp = entry; + + if (rsp->subop == FUN_ADMIN_SUBOP_NOTIFY) { + fun_update_link_state(to_fun_ethdev(fdev), rsp); + } else if (rsp->subop == FUN_ADMIN_SUBOP_RES_COUNT) { + const struct fun_admin_res_count_rsp *r = entry; + + if (r->count.data) + set_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags); + else + set_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags); + fun_serv_sched(fdev); + } else { + dev_info(fdev->dev, "adminq event unexpected op %u subop %u", + op, rsp->subop); + } + } else { + dev_info(fdev->dev, "adminq event unexpected op %u", op); + } +} + +/* handler for pending work managed by the service task */ +static void fun_service_cb(struct fun_dev *fdev) +{ + struct fun_ethdev *ed = to_fun_ethdev(fdev); + int rc; + + if (test_and_clear_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags)) + fun_destroy_ports(ed); + + if (!test_and_clear_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags)) + return; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT); + if (rc < 0 || rc == ed->num_ports) + return; + + if (ed->num_ports) + fun_destroy_ports(ed); + if (rc) + fun_create_ports(ed, rc); +} + +static int funeth_sriov_configure(struct pci_dev *pdev, int nvfs) +{ + struct fun_dev *fdev = pci_get_drvdata(pdev); + struct fun_ethdev *ed = to_fun_ethdev(fdev); + int rc; + + if (nvfs == 0) { + if (pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, + "Cannot disable SR-IOV while VFs are assigned\n"); + return -EPERM; + } + + mutex_lock(&ed->state_mutex); + fun_free_vports(ed); + mutex_unlock(&ed->state_mutex); + pci_disable_sriov(pdev); + return 0; + } + + rc = pci_enable_sriov(pdev, nvfs); + if (rc) + return rc; + + mutex_lock(&ed->state_mutex); + rc = fun_init_vports(ed, nvfs); + mutex_unlock(&ed->state_mutex); + if (rc) { + pci_disable_sriov(pdev); + return rc; + } + + return nvfs; +} + +static int funeth_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct fun_dev_params aqreq = { + .cqe_size_log2 = ilog2(ADMIN_CQE_SIZE), + .sqe_size_log2 = ilog2(ADMIN_SQE_SIZE), + .cq_depth = ADMIN_CQ_DEPTH, + .sq_depth = ADMIN_SQ_DEPTH, + .rq_depth = ADMIN_RQ_DEPTH, + .min_msix = 2, /* 1 Rx + 1 Tx */ + .event_cb = fun_event_cb, + .serv_cb = fun_service_cb, + }; + struct devlink *devlink; + struct fun_ethdev *ed; + struct fun_dev *fdev; + int rc; + + devlink = fun_devlink_alloc(&pdev->dev); + if (!devlink) { + dev_err(&pdev->dev, "devlink alloc failed\n"); + return -ENOMEM; + } + + ed = devlink_priv(devlink); + mutex_init(&ed->state_mutex); + + fdev = &ed->fdev; + rc = fun_dev_enable(fdev, pdev, &aqreq, KBUILD_MODNAME); + if (rc) + goto free_devlink; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT); + if (rc > 0) + rc = fun_create_ports(ed, rc); + if (rc < 0) + goto disable_dev; + + fun_serv_restart(fdev); + fun_devlink_register(devlink); + return 0; + +disable_dev: + fun_dev_disable(fdev); +free_devlink: + mutex_destroy(&ed->state_mutex); + fun_devlink_free(devlink); + return rc; +} + +static void funeth_remove(struct pci_dev *pdev) +{ + struct fun_dev *fdev = pci_get_drvdata(pdev); + struct devlink *devlink; + struct fun_ethdev *ed; + + ed = to_fun_ethdev(fdev); + devlink = priv_to_devlink(ed); + fun_devlink_unregister(devlink); + +#ifdef CONFIG_PCI_IOV + funeth_sriov_configure(pdev, 0); +#endif + + fun_serv_stop(fdev); + fun_destroy_ports(ed); + fun_dev_disable(fdev); + mutex_destroy(&ed->state_mutex); + + fun_devlink_free(devlink); +} + +static struct pci_driver funeth_driver = { + .name = KBUILD_MODNAME, + .id_table = funeth_id_table, + .probe = funeth_probe, + .remove = funeth_remove, + .shutdown = funeth_remove, + .sriov_configure = funeth_sriov_configure, +}; + +module_pci_driver(funeth_driver); + +MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>"); +MODULE_DESCRIPTION("Fungible Ethernet Network Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DEVICE_TABLE(pci, funeth_id_table); diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c new file mode 100644 index 000000000000..0f6a549b9f67 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c @@ -0,0 +1,826 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/bpf_trace.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/filter.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include "funeth_txrx.h" +#include "funeth.h" +#include "fun_queue.h" + +#define CREATE_TRACE_POINTS +#include "funeth_trace.h" + +/* Given the device's max supported MTU and pages of at least 4KB a packet can + * be scattered into at most 4 buffers. + */ +#define RX_MAX_FRAGS 4 + +/* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */ +#define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) + +/* We try to reuse pages for our buffers. To avoid frequent page ref writes we + * take EXTRA_PAGE_REFS references at once and then hand them out one per packet + * occupying the buffer. + */ +#define EXTRA_PAGE_REFS 1000000 +#define MIN_PAGE_REFS 1000 + +enum { + FUN_XDP_FLUSH_REDIR = 1, + FUN_XDP_FLUSH_TX = 2, +}; + +/* See if a page is running low on refs we are holding and if so take more. */ +static void refresh_refs(struct funeth_rxbuf *buf) +{ + if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) { + buf->pg_refs += EXTRA_PAGE_REFS; + page_ref_add(buf->page, EXTRA_PAGE_REFS); + } +} + +/* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its + * page is worth retaining and there's room for it. Otherwise the page is + * unmapped and our references released. + */ +static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf) +{ + struct funeth_rx_cache *c = &q->cache; + + if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) { + c->bufs[c->prod_cnt & c->mask] = *buf; + c->prod_cnt++; + } else { + dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + __page_frag_cache_drain(buf->page, buf->pg_refs); + } +} + +/* Get a page from the Rx buffer cache. We only consider the next available + * page and return it if we own all its references. + */ +static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb) +{ + struct funeth_rx_cache *c = &q->cache; + struct funeth_rxbuf *buf; + + if (c->prod_cnt == c->cons_cnt) + return false; /* empty cache */ + + buf = &c->bufs[c->cons_cnt & c->mask]; + if (page_ref_count(buf->page) == buf->pg_refs) { + dma_sync_single_for_device(q->dma_dev, buf->dma_addr, + PAGE_SIZE, DMA_FROM_DEVICE); + *rb = *buf; + buf->page = NULL; + refresh_refs(rb); + c->cons_cnt++; + return true; + } + + /* Page can't be reused. If the cache is full drop this page. */ + if (c->prod_cnt - c->cons_cnt > c->mask) { + dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + __page_frag_cache_drain(buf->page, buf->pg_refs); + buf->page = NULL; + c->cons_cnt++; + } + return false; +} + +/* Allocate and DMA-map a page for receive. */ +static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb, + int node, gfp_t gfp) +{ + struct page *p; + + if (cache_get(q, rb)) + return 0; + + p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0); + if (unlikely(!p)) + return -ENOMEM; + + rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) { + FUN_QSTAT_INC(q, rx_map_err); + __free_page(p); + return -ENOMEM; + } + + FUN_QSTAT_INC(q, rx_page_alloc); + + rb->page = p; + rb->pg_refs = 1; + refresh_refs(rb); + rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p); + return 0; +} + +static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb) +{ + if (rb->page) { + dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE); + __page_frag_cache_drain(rb->page, rb->pg_refs); + rb->page = NULL; + } +} + +/* Run the XDP program assigned to an Rx queue. + * Return %NULL if the buffer is consumed, or the virtual address of the packet + * to turn into an skb. + */ +static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, + int ref_ok, struct funeth_txq *xdp_q) +{ + struct bpf_prog *xdp_prog; + struct xdp_buff xdp; + u32 act; + + /* VA includes the headroom, frag size includes headroom + tailroom */ + xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN), + &q->xdp_rxq); + xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) - + (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false); + + xdp_prog = READ_ONCE(q->xdp_prog); + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + switch (act) { + case XDP_PASS: + /* remove headroom, which may not be FUN_XDP_HEADROOM now */ + skb_frag_size_set(frags, xdp.data_end - xdp.data); + skb_frag_off_add(frags, xdp.data - xdp.data_hard_start); + goto pass; + case XDP_TX: + if (unlikely(!ref_ok)) + goto pass; + if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data)) + goto xdp_error; + FUN_QSTAT_INC(q, xdp_tx); + q->xdp_flush |= FUN_XDP_FLUSH_TX; + break; + case XDP_REDIRECT: + if (unlikely(!ref_ok)) + goto pass; + if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog))) + goto xdp_error; + FUN_QSTAT_INC(q, xdp_redir); + q->xdp_flush |= FUN_XDP_FLUSH_REDIR; + break; + default: + bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(q->netdev, xdp_prog, act); +xdp_error: + q->cur_buf->pg_refs++; /* return frags' page reference */ + FUN_QSTAT_INC(q, xdp_err); + break; + case XDP_DROP: + q->cur_buf->pg_refs++; + FUN_QSTAT_INC(q, xdp_drops); + break; + } + return NULL; + +pass: + return xdp.data; +} + +/* A CQE contains a fixed completion structure along with optional metadata and + * even packet data. Given the start address of a CQE return the start of the + * contained fixed structure, which lies at the end. + */ +static const void *cqe_to_info(const void *cqe) +{ + return cqe + FUNETH_CQE_INFO_OFFSET; +} + +/* The inverse of cqe_to_info(). */ +static const void *info_to_cqe(const void *cqe_info) +{ + return cqe_info - FUNETH_CQE_INFO_OFFSET; +} + +/* Return the type of hash provided by the device based on the L3 and L4 + * protocols it parsed for the packet. + */ +static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse) +{ + static const enum pkt_hash_types htype_map[] = { + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3 + }; + u16 key; + + /* Build the key from the TCP/UDP and IP/IPv6 bits */ + key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) | + ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1); + + return htype_map[key]; +} + +/* Each received packet can be scattered across several Rx buffers or can + * share a buffer with previously received packets depending on the buffer + * and packet sizes and the room available in the most recently used buffer. + * + * The rules are: + * - If the buffer at the head of an RQ has not been used it gets (part of) the + * next incoming packet. + * - Otherwise, if the packet fully fits in the buffer's remaining space the + * packet is written there. + * - Otherwise, the packet goes into the next Rx buffer. + * + * This function returns the Rx buffer for a packet or fragment thereof of the + * given length. If it isn't @buf it either recycles or frees that buffer + * before advancing the queue to the next buffer. + * + * If called repeatedly with the remaining length of a packet it will walk + * through all the buffers containing the packet. + */ +static struct funeth_rxbuf * +get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len) +{ + if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset) + return buf; /* @buf holds (part of) the packet */ + + /* The packet occupies part of the next buffer. Move there after + * replenishing the current buffer slot either with the spare page or + * by reusing the slot's existing page. Note that if a spare page isn't + * available and the current packet occupies @buf it is a multi-frag + * packet that will be dropped leaving @buf available for reuse. + */ + if ((page_ref_count(buf->page) == buf->pg_refs && + buf->node == numa_mem_id()) || !q->spare_buf.page) { + dma_sync_single_for_device(q->dma_dev, buf->dma_addr, + PAGE_SIZE, DMA_FROM_DEVICE); + refresh_refs(buf); + } else { + cache_offer(q, buf); + *buf = q->spare_buf; + q->spare_buf.page = NULL; + q->rqes[q->rq_cons & q->rq_mask] = + FUN_EPRQ_RQBUF_INIT(buf->dma_addr); + } + q->buf_offset = 0; + q->rq_cons++; + return &q->bufs[q->rq_cons & q->rq_mask]; +} + +/* Gather the page fragments making up the first Rx packet on @q. Its total + * length @tot_len includes optional head- and tail-rooms. + * + * Return 0 if the device retains ownership of at least some of the pages. + * In this case the caller may only copy the packet. + * + * A non-zero return value gives the caller permission to use references to the + * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least + * one of the pages is PF_MEMALLOC. + * + * Regardless of outcome the caller is granted a reference to each of the pages. + */ +static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len, + skb_frag_t *frags) +{ + struct funeth_rxbuf *buf = q->cur_buf; + unsigned int frag_len; + int ref_ok = 1; + + for (;;) { + buf = get_buf(q, buf, tot_len); + + /* We always keep the RQ full of buffers so before we can give + * one of our pages to the stack we require that we can obtain + * a replacement page. If we can't the packet will either be + * copied or dropped so we can retain ownership of the page and + * reuse it. + */ + if (!q->spare_buf.page && + funeth_alloc_page(q, &q->spare_buf, numa_mem_id(), + GFP_ATOMIC | __GFP_MEMALLOC)) + ref_ok = 0; + + frag_len = min_t(unsigned int, tot_len, + PAGE_SIZE - q->buf_offset); + dma_sync_single_for_cpu(q->dma_dev, + buf->dma_addr + q->buf_offset, + frag_len, DMA_FROM_DEVICE); + buf->pg_refs--; + if (ref_ok) + ref_ok |= buf->node; + + __skb_frag_set_page(frags, buf->page); + skb_frag_off_set(frags, q->buf_offset); + skb_frag_size_set(frags++, frag_len); + + tot_len -= frag_len; + if (!tot_len) + break; + + q->buf_offset = PAGE_SIZE; + } + q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN); + q->cur_buf = buf; + return ref_ok; +} + +static bool rx_hwtstamp_enabled(const struct net_device *dev) +{ + const struct funeth_priv *d = netdev_priv(dev); + + return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL; +} + +/* Advance the CQ pointers and phase tag to the next CQE. */ +static void advance_cq(struct funeth_rxq *q) +{ + if (unlikely(q->cq_head == q->cq_mask)) { + q->cq_head = 0; + q->phase ^= 1; + q->next_cqe_info = cqe_to_info(q->cqes); + } else { + q->cq_head++; + q->next_cqe_info += FUNETH_CQE_SIZE; + } + prefetch(q->next_cqe_info); +} + +/* Process the packet represented by the head CQE of @q. Gather the packet's + * fragments, run it through the optional XDP program, and if needed construct + * an skb and pass it to the stack. + */ +static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q) +{ + const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info); + unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len); + struct net_device *ndev = q->netdev; + skb_frag_t frags[RX_MAX_FRAGS]; + struct skb_shared_info *si; + unsigned int headroom; + gro_result_t gro_res; + struct sk_buff *skb; + int ref_ok; + void *va; + u16 cv; + + u64_stats_update_begin(&q->syncp); + q->stats.rx_pkts++; + q->stats.rx_bytes += pkt_len; + u64_stats_update_end(&q->syncp); + + advance_cq(q); + + /* account for head- and tail-room, present only for 1-buffer packets */ + tot_len = pkt_len; + headroom = be16_to_cpu(rxreq->headroom); + if (likely(headroom)) + tot_len += FUN_RX_TAILROOM + headroom; + + ref_ok = fun_gather_pkt(q, tot_len, frags); + va = skb_frag_address(frags); + if (xdp_q && headroom == FUN_XDP_HEADROOM) { + va = fun_run_xdp(q, frags, va, ref_ok, xdp_q); + if (!va) + return; + headroom = 0; /* XDP_PASS trims it */ + } + if (unlikely(!ref_ok)) + goto no_mem; + + if (likely(headroom)) { + /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */ + prefetch(va + headroom); + skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN)); + if (unlikely(!skb)) + goto no_mem; + + skb_reserve(skb, headroom); + __skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, ndev); + } else { + prefetch(va); + skb = napi_get_frags(q->napi); + if (unlikely(!skb)) + goto no_mem; + + if (ref_ok < 0) + skb->pfmemalloc = 1; + + si = skb_shinfo(skb); + si->nr_frags = rxreq->nsgl; + for (i = 0; i < si->nr_frags; i++) + si->frags[i] = frags[i]; + + skb->len = pkt_len; + skb->data_len = pkt_len; + skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN); + } + + skb_record_rx_queue(skb, q->qidx); + cv = be16_to_cpu(rxreq->pkt_cv); + if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash)) + skb_set_hash(skb, be32_to_cpu(rxreq->hash), + cqe_to_pkt_hash_type(cv)); + if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) { + FUN_QSTAT_INC(q, rx_cso); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = be16_to_cpu(rxreq->csum) - 1; + } + if (unlikely(rx_hwtstamp_enabled(q->netdev))) + skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp); + + trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv); + + gro_res = skb->data_len ? napi_gro_frags(q->napi) : + napi_gro_receive(q->napi, skb); + if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE) + FUN_QSTAT_INC(q, gro_merged); + else if (gro_res == GRO_HELD) + FUN_QSTAT_INC(q, gro_pkts); + return; + +no_mem: + FUN_QSTAT_INC(q, rx_mem_drops); + + /* Release the references we've been granted for the frag pages. + * We return the ref of the last frag and free the rest. + */ + q->cur_buf->pg_refs++; + for (i = 0; i < rxreq->nsgl - 1; i++) + __free_page(skb_frag_page(frags + i)); +} + +/* Return 0 if the phase tag of the CQE at the CQ's head matches expectations + * indicating the CQE is new. + */ +static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase) +{ + u16 sf_p = be16_to_cpu(ci->sf_p); + + return (sf_p & 1) ^ phase; +} + +/* Walk through a CQ identifying and processing fresh CQEs up to the given + * budget. Return the remaining budget. + */ +static int fun_process_cqes(struct funeth_rxq *q, int budget) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct funeth_txq **xdpqs, *xdp_q = NULL; + + xdpqs = rcu_dereference_bh(fp->xdpqs); + if (xdpqs) + xdp_q = xdpqs[smp_processor_id()]; + + while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) { + /* access other descriptor fields after the phase check */ + dma_rmb(); + + fun_handle_cqe_pkt(q, xdp_q); + budget--; + } + + if (unlikely(q->xdp_flush)) { + if (q->xdp_flush & FUN_XDP_FLUSH_TX) + fun_txq_wr_db(xdp_q); + if (q->xdp_flush & FUN_XDP_FLUSH_REDIR) + xdp_do_flush(); + q->xdp_flush = 0; + } + + return budget; +} + +/* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ + * doorbells as needed. + */ +int fun_rxq_napi_poll(struct napi_struct *napi, int budget) +{ + struct fun_irq *irq = container_of(napi, struct fun_irq, napi); + struct funeth_rxq *q = irq->rxq; + int work_done = budget - fun_process_cqes(q, budget); + u32 cq_db_val = q->cq_head; + + if (unlikely(work_done >= budget)) + FUN_QSTAT_INC(q, rx_budget); + else if (napi_complete_done(napi, work_done)) + cq_db_val |= q->irq_db_val; + + /* check whether to post new Rx buffers */ + if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) { + u64_stats_update_begin(&q->syncp); + q->stats.rx_bufs += q->rq_cons - q->rq_cons_db; + u64_stats_update_end(&q->syncp); + q->rq_cons_db = q->rq_cons; + writel((q->rq_cons - 1) & q->rq_mask, q->rq_db); + } + + writel(cq_db_val, q->cq_db); + return work_done; +} + +/* Free the Rx buffers of an Rx queue. */ +static void fun_rxq_free_bufs(struct funeth_rxq *q) +{ + struct funeth_rxbuf *b = q->bufs; + unsigned int i; + + for (i = 0; i <= q->rq_mask; i++, b++) + funeth_free_page(q, b); + + funeth_free_page(q, &q->spare_buf); + q->cur_buf = NULL; +} + +/* Initially provision an Rx queue with Rx buffers. */ +static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node) +{ + struct funeth_rxbuf *b = q->bufs; + unsigned int i; + + for (i = 0; i <= q->rq_mask; i++, b++) { + if (funeth_alloc_page(q, b, node, GFP_KERNEL)) { + fun_rxq_free_bufs(q); + return -ENOMEM; + } + q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr); + } + q->cur_buf = q->bufs; + return 0; +} + +/* Initialize a used-buffer cache of the given depth. */ +static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth, + int node) +{ + c->mask = depth - 1; + c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node); + return c->bufs ? 0 : -ENOMEM; +} + +/* Deallocate an Rx queue's used-buffer cache and its contents. */ +static void fun_rxq_free_cache(struct funeth_rxq *q) +{ + struct funeth_rxbuf *b = q->cache.bufs; + unsigned int i; + + for (i = 0; i <= q->cache.mask; i++, b++) + funeth_free_page(q, b); + + kvfree(q->cache.bufs); + q->cache.bufs = NULL; +} + +int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct fun_admin_epcq_req cmd; + u16 headroom; + int err; + + headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; + if (headroom != q->headroom) { + cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, + sizeof(cmd)); + cmd.u.modify = + FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY, + 0, q->hw_cqid, headroom); + err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0, + 0); + if (err) + return err; + q->headroom = headroom; + } + + WRITE_ONCE(q->xdp_prog, prog); + return 0; +} + +/* Create an Rx queue, allocating the host memory it needs. */ +static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev, + unsigned int qidx, + unsigned int ncqe, + unsigned int nrqe, + struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_rxq *q; + int err = -ENOMEM; + int numa_node; + + numa_node = fun_irq_node(irq); + q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); + if (!q) + goto err; + + q->qidx = qidx; + q->netdev = dev; + q->cq_mask = ncqe - 1; + q->rq_mask = nrqe - 1; + q->numa_node = numa_node; + q->rq_db_thres = nrqe / 4; + u64_stats_init(&q->syncp); + q->dma_dev = &fp->pdev->dev; + + q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), + sizeof(*q->bufs), false, numa_node, + &q->rq_dma_addr, (void **)&q->bufs, NULL); + if (!q->rqes) + goto free_q; + + q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0, + false, numa_node, &q->cq_dma_addr, NULL, + NULL); + if (!q->cqes) + goto free_rqes; + + err = fun_rxq_init_cache(&q->cache, nrqe, numa_node); + if (err) + goto free_cqes; + + err = fun_rxq_alloc_bufs(q, numa_node); + if (err) + goto free_cache; + + q->stats.rx_bufs = q->rq_mask; + q->init_state = FUN_QSTATE_INIT_SW; + return q; + +free_cache: + fun_rxq_free_cache(q); +free_cqes: + dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes, + q->cq_dma_addr); +free_rqes: + fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes, + q->rq_dma_addr, q->bufs); +free_q: + kfree(q); +err: + netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx); + return ERR_PTR(err); +} + +static void fun_rxq_free_sw(struct funeth_rxq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + fun_rxq_free_cache(q); + fun_rxq_free_bufs(q); + fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false, + q->rqes, q->rq_dma_addr, q->bufs); + dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE, + q->cqes, q->cq_dma_addr); + + /* Before freeing the queue transfer key counters to the device. */ + fp->rx_packets += q->stats.rx_pkts; + fp->rx_bytes += q->stats.rx_bytes; + fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops; + + kfree(q); +} + +/* Create an Rx queue's resources on the device. */ +int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + unsigned int ncqe = q->cq_mask + 1; + unsigned int nrqe = q->rq_mask + 1; + int err; + + err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx, + irq->napi.napi_id); + if (err) + goto out; + + err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + goto xdp_unreg; + + q->phase = 1; + q->irq_cnt = 0; + q->cq_head = 0; + q->rq_cons = 0; + q->rq_cons_db = 0; + q->buf_offset = 0; + q->napi = &irq->napi; + q->irq_db_val = fp->cq_irq_db; + q->next_cqe_info = cqe_to_info(q->cqes); + + q->xdp_prog = fp->xdp_prog; + q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; + + err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | + FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0, + FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0, + 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT, + &q->hw_sqid, &q->rq_db); + if (err) + goto xdp_unreg; + + err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | + FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0, + q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe, + q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0, + irq->irq_idx, 0, fp->fdev->kern_end_qid, + &q->hw_cqid, &q->cq_db); + if (err) + goto free_rq; + + irq->rxq = q; + writel(q->rq_mask, q->rq_db); + q->init_state = FUN_QSTATE_INIT_FULL; + + netif_info(fp, ifup, q->netdev, + "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n", + q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx, + q->numa_node, q->headroom); + return 0; + +free_rq: + fun_destroy_sq(fp->fdev, q->hw_sqid); +xdp_unreg: + xdp_rxq_info_unreg(&q->xdp_rxq); +out: + netdev_err(q->netdev, + "Failed to create Rx queue %u on device, error %d\n", + q->qidx, err); + return err; +} + +static void fun_rxq_free_dev(struct funeth_rxq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct fun_irq *irq; + + if (q->init_state < FUN_QSTATE_INIT_FULL) + return; + + irq = container_of(q->napi, struct fun_irq, napi); + netif_info(fp, ifdown, q->netdev, + "Freeing Rx queue %u (id %u/%u), IRQ %u\n", + q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx); + + irq->rxq = NULL; + xdp_rxq_info_unreg(&q->xdp_rxq); + fun_destroy_sq(fp->fdev, q->hw_sqid); + fun_destroy_cq(fp->fdev, q->hw_cqid); + q->init_state = FUN_QSTATE_INIT_SW; +} + +/* Create or advance an Rx queue, allocating all the host and device resources + * needed to reach the target state. + */ +int funeth_rxq_create(struct net_device *dev, unsigned int qidx, + unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, + int state, struct funeth_rxq **qp) +{ + struct funeth_rxq *q = *qp; + int err; + + if (!q) { + q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq); + if (IS_ERR(q)) + return PTR_ERR(q); + } + + if (q->init_state >= state) + goto out; + + err = fun_rxq_create_dev(q, irq); + if (err) { + if (!*qp) + fun_rxq_free_sw(q); + return err; + } + +out: + *qp = q; + return 0; +} + +/* Free Rx queue resources until it reaches the target state. */ +struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state) +{ + if (state < FUN_QSTATE_INIT_FULL) + fun_rxq_free_dev(q); + + if (state == FUN_QSTATE_DESTROYED) { + fun_rxq_free_sw(q); + q = NULL; + } + + return q; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_trace.h b/drivers/net/ethernet/fungible/funeth/funeth_trace.h new file mode 100644 index 000000000000..9e58dfec19d5 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_trace.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM funeth + +#if !defined(_TRACE_FUNETH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FUNETH_H + +#include <linux/tracepoint.h> + +#include "funeth_txrx.h" + +TRACE_EVENT(funeth_tx, + + TP_PROTO(const struct funeth_txq *txq, + u32 len, + u32 sqe_idx, + u32 ngle), + + TP_ARGS(txq, len, sqe_idx, ngle), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, len) + __field(u32, sqe_idx) + __field(u32, ngle) + __string(devname, txq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = txq->qidx; + __entry->len = len; + __entry->sqe_idx = sqe_idx; + __entry->ngle = ngle; + __assign_str(devname, txq->netdev->name); + ), + + TP_printk("%s: Txq %u, SQE idx %u, len %u, num GLEs %u", + __get_str(devname), __entry->qidx, __entry->sqe_idx, + __entry->len, __entry->ngle) +); + +TRACE_EVENT(funeth_tx_free, + + TP_PROTO(const struct funeth_txq *txq, + u32 sqe_idx, + u32 num_sqes, + u32 hw_head), + + TP_ARGS(txq, sqe_idx, num_sqes, hw_head), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, sqe_idx) + __field(u32, num_sqes) + __field(u32, hw_head) + __string(devname, txq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = txq->qidx; + __entry->sqe_idx = sqe_idx; + __entry->num_sqes = num_sqes; + __entry->hw_head = hw_head; + __assign_str(devname, txq->netdev->name); + ), + + TP_printk("%s: Txq %u, SQE idx %u, SQEs %u, HW head %u", + __get_str(devname), __entry->qidx, __entry->sqe_idx, + __entry->num_sqes, __entry->hw_head) +); + +TRACE_EVENT(funeth_rx, + + TP_PROTO(const struct funeth_rxq *rxq, + u32 num_rqes, + u32 pkt_len, + u32 hash, + u32 cls_vec), + + TP_ARGS(rxq, num_rqes, pkt_len, hash, cls_vec), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, cq_head) + __field(u32, num_rqes) + __field(u32, len) + __field(u32, hash) + __field(u32, cls_vec) + __string(devname, rxq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = rxq->qidx; + __entry->cq_head = rxq->cq_head; + __entry->num_rqes = num_rqes; + __entry->len = pkt_len; + __entry->hash = hash; + __entry->cls_vec = cls_vec; + __assign_str(devname, rxq->netdev->name); + ), + + TP_printk("%s: Rxq %u, CQ head %u, RQEs %u, len %u, hash %u, CV %#x", + __get_str(devname), __entry->qidx, __entry->cq_head, + __entry->num_rqes, __entry->len, __entry->hash, + __entry->cls_vec) +); + +#endif /* _TRACE_FUNETH_H */ + +/* Below must be outside protection. */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE funeth_trace + +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c new file mode 100644 index 000000000000..ff6e29237253 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -0,0 +1,763 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/dma-mapping.h> +#include <linux/ip.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include <linux/tcp.h> +#include <uapi/linux/udp.h> +#include "funeth.h" +#include "funeth_ktls.h" +#include "funeth_txrx.h" +#include "funeth_trace.h" +#include "fun_queue.h" + +#define FUN_XDP_CLEAN_THRES 32 +#define FUN_XDP_CLEAN_BATCH 16 + +/* DMA-map a packet and return the (length, DMA_address) pairs for its + * segments. If a mapping error occurs -ENOMEM is returned. + */ +static int map_skb(const struct sk_buff *skb, struct device *dev, + dma_addr_t *addr, unsigned int *len) +{ + const struct skb_shared_info *si; + const skb_frag_t *fp, *end; + + *len = skb_headlen(skb); + *addr = dma_map_single(dev, skb->data, *len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, *addr)) + return -ENOMEM; + + si = skb_shinfo(skb); + end = &si->frags[si->nr_frags]; + + for (fp = si->frags; fp < end; fp++) { + *++len = skb_frag_size(fp); + *++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, *addr)) + goto unwind; + } + return 0; + +unwind: + while (fp-- > si->frags) + dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE); + + dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE); + return -ENOMEM; +} + +/* Return the address just past the end of a Tx queue's descriptor ring. + * It exploits the fact that the HW writeback area is just after the end + * of the descriptor ring. + */ +static void *txq_end(const struct funeth_txq *q) +{ + return (void *)q->hw_wb; +} + +/* Return the amount of space within a Tx ring from the given address to the + * end. + */ +static unsigned int txq_to_end(const struct funeth_txq *q, void *p) +{ + return txq_end(q) - p; +} + +/* Return the number of Tx descriptors occupied by a Tx request. */ +static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req) +{ + return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8); +} + +static __be16 tcp_hdr_doff_flags(const struct tcphdr *th) +{ + return *(__be16 *)&tcp_flag_word(th); +} + +static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q, + unsigned int *tls_len) +{ +#if IS_ENABLED(CONFIG_TLS_DEVICE) + const struct fun_ktls_tx_ctx *tls_ctx; + u32 datalen, seq; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + return skb; + + if (likely(!tls_offload_tx_resync_pending(skb->sk))) { + seq = ntohl(tcp_hdr(skb)->seq); + tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + + if (likely(tls_ctx->next_seq == seq)) { + *tls_len = datalen; + return skb; + } + if (seq - tls_ctx->next_seq < U32_MAX / 4) { + tls_offload_tx_resync_request(skb->sk, seq, + tls_ctx->next_seq); + } + } + + FUN_QSTAT_INC(q, tx_tls_fallback); + skb = tls_encrypt_skb(skb); + if (!skb) + FUN_QSTAT_INC(q, tx_tls_drops); + + return skb; +#else + return NULL; +#endif +} + +/* Write as many descriptors as needed for the supplied skb starting at the + * current producer location. The caller has made certain enough descriptors + * are available. + * + * Returns the number of descriptors written, 0 on error. + */ +static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, + unsigned int tls_len) +{ + unsigned int extra_bytes = 0, extra_pkts = 0; + unsigned int idx = q->prod_cnt & q->mask; + const struct skb_shared_info *shinfo; + unsigned int lens[MAX_SKB_FRAGS + 1]; + dma_addr_t addrs[MAX_SKB_FRAGS + 1]; + struct fun_eth_tx_req *req; + struct fun_dataop_gl *gle; + const struct tcphdr *th; + unsigned int ngle, i; + u16 flags; + + if (unlikely(map_skb(skb, q->dma_dev, addrs, lens))) { + FUN_QSTAT_INC(q, tx_map_err); + return 0; + } + + req = fun_tx_desc_addr(q, idx); + req->op = FUN_ETH_OP_TX; + req->len8 = 0; + req->flags = 0; + req->suboff8 = offsetof(struct fun_eth_tx_req, dataop); + req->repr_idn = 0; + req->encap_proto = 0; + + shinfo = skb_shinfo(skb); + if (likely(shinfo->gso_size)) { + if (skb->encapsulation) { + u16 ol4_ofst; + + flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_OUTER_L3_LEN; + if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)) { + flags |= FUN_ETH_UPDATE_OUTER_L4_LEN | + FUN_ETH_OUTER_UDP; + if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) + flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM; + ol4_ofst = skb_transport_offset(skb); + } else { + ol4_ofst = skb_inner_network_offset(skb); + } + + if (ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM; + else + flags |= FUN_ETH_OUTER_IPV6; + + if (skb->inner_network_header) { + if (inner_ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM | + FUN_ETH_UPDATE_INNER_L3_LEN; + else + flags |= FUN_ETH_INNER_IPV6 | + FUN_ETH_UPDATE_INNER_L3_LEN; + } + th = inner_tcp_hdr(skb); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + tcp_hdr_doff_flags(th), 0, + skb_inner_network_offset(skb), + skb_inner_transport_offset(skb), + skb_network_offset(skb), ol4_ofst); + FUN_QSTAT_INC(q, tx_encap_tso); + } else { + /* HW considers one set of headers as inner */ + flags = FUN_ETH_INNER_LSO | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_INNER_L3_LEN; + if (shinfo->gso_type & SKB_GSO_TCPV6) + flags |= FUN_ETH_INNER_IPV6; + else + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM; + th = tcp_hdr(skb); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + tcp_hdr_doff_flags(th), 0, + skb_network_offset(skb), + skb_transport_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_tso); + } + + u64_stats_update_begin(&q->syncp); + q->stats.tx_cso += shinfo->gso_segs; + u64_stats_update_end(&q->syncp); + + extra_pkts = shinfo->gso_segs - 1; + extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) + + __tcp_hdrlen(th)) * extra_pkts; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + flags = FUN_ETH_UPDATE_INNER_L4_CKSUM; + if (skb->csum_offset == offsetof(struct udphdr, check)) + flags |= FUN_ETH_INNER_UDP; + fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0, + skb_checksum_start_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_cso); + } else { + fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0); + } + + ngle = shinfo->nr_frags + 1; + req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8; + req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len); + + for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm; + i < ngle && txq_to_end(q, gle); i++, gle++) + fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]); + + if (txq_to_end(q, gle) == 0) { + gle = (struct fun_dataop_gl *)q->desc; + for ( ; i < ngle; i++, gle++) + fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]); + } + + if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) { + struct fun_eth_tls *tls = (struct fun_eth_tls *)gle; + struct fun_ktls_tx_ctx *tls_ctx; + + req->len8 += FUNETH_TLS_SZ / 8; + req->flags = cpu_to_be16(FUN_ETH_TX_TLS); + + tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + tls->tlsid = tls_ctx->tlsid; + tls_ctx->next_seq += tls_len; + + u64_stats_update_begin(&q->syncp); + q->stats.tx_tls_bytes += tls_len; + q->stats.tx_tls_pkts += 1 + extra_pkts; + u64_stats_update_end(&q->syncp); + } + + u64_stats_update_begin(&q->syncp); + q->stats.tx_bytes += skb->len + extra_bytes; + q->stats.tx_pkts += 1 + extra_pkts; + u64_stats_update_end(&q->syncp); + + q->info[idx].skb = skb; + + trace_funeth_tx(q, skb->len, idx, req->dataop.ngather); + return tx_req_ndesc(req); +} + +/* Return the number of available descriptors of a Tx queue. + * HW assumes head==tail means the ring is empty so we need to keep one + * descriptor unused. + */ +static unsigned int fun_txq_avail(const struct funeth_txq *q) +{ + return q->mask - q->prod_cnt + q->cons_cnt; +} + +/* Stop a queue if it can't handle another worst-case packet. */ +static void fun_tx_check_stop(struct funeth_txq *q) +{ + if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC)) + return; + + netif_tx_stop_queue(q->ndq); + + /* NAPI reclaim is freeing packets in parallel with us and we may race. + * We have stopped the queue but check again after synchronizing with + * reclaim. + */ + smp_mb(); + if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC)) + FUN_QSTAT_INC(q, tx_nstops); + else + netif_tx_start_queue(q->ndq); +} + +/* Return true if a queue has enough space to restart. Current condition is + * that the queue must be >= 1/4 empty. + */ +static bool fun_txq_may_restart(struct funeth_txq *q) +{ + return fun_txq_avail(q) >= q->mask / 4; +} + +netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + unsigned int qid = skb_get_queue_mapping(skb); + struct funeth_txq *q = fp->txqs[qid]; + unsigned int tls_len = 0; + unsigned int ndesc; + + if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk && + tls_is_sk_tx_device_offloaded(skb->sk)) { + skb = fun_tls_tx(skb, q, &tls_len); + if (unlikely(!skb)) + goto dropped; + } + + ndesc = write_pkt_desc(skb, q, tls_len); + if (unlikely(!ndesc)) { + dev_kfree_skb_any(skb); + goto dropped; + } + + q->prod_cnt += ndesc; + fun_tx_check_stop(q); + + skb_tx_timestamp(skb); + + if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more())) + fun_txq_wr_db(q); + else + FUN_QSTAT_INC(q, tx_more); + + return NETDEV_TX_OK; + +dropped: + /* A dropped packet may be the last one in a xmit_more train, + * ring the doorbell just in case. + */ + if (!netdev_xmit_more()) + fun_txq_wr_db(q); + return NETDEV_TX_OK; +} + +/* Return a Tx queue's HW head index written back to host memory. */ +static u16 txq_hw_head(const struct funeth_txq *q) +{ + return (u16)be64_to_cpu(*q->hw_wb); +} + +/* Unmap the Tx packet starting at the given descriptor index and + * return the number of Tx descriptors it occupied. + */ +static unsigned int unmap_skb(const struct funeth_txq *q, unsigned int idx) +{ + const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx); + unsigned int ngle = req->dataop.ngather; + struct fun_dataop_gl *gle; + + if (ngle) { + gle = (struct fun_dataop_gl *)req->dataop.imm; + dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE); + + for (gle++; --ngle && txq_to_end(q, gle); gle++) + dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), + DMA_TO_DEVICE); + + for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++) + dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), + DMA_TO_DEVICE); + } + + return tx_req_ndesc(req); +} + +/* Reclaim completed Tx descriptors and free their packets. Restart a stopped + * queue if we freed enough descriptors. + * + * Return true if we exhausted the budget while there is more work to be done. + */ +static bool fun_txq_reclaim(struct funeth_txq *q, int budget) +{ + unsigned int npkts = 0, nbytes = 0, ndesc = 0; + unsigned int head, limit, reclaim_idx; + + /* budget may be 0, e.g., netpoll */ + limit = budget ? budget : UINT_MAX; + + for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask; + head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) { + /* The HW head is continually updated, ensure we don't read + * descriptor state before the head tells us to reclaim it. + * On the enqueue side the doorbell is an implicit write + * barrier. + */ + rmb(); + + do { + unsigned int pkt_desc = unmap_skb(q, reclaim_idx); + struct sk_buff *skb = q->info[reclaim_idx].skb; + + trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head); + + nbytes += skb->len; + napi_consume_skb(skb, budget); + ndesc += pkt_desc; + reclaim_idx = (reclaim_idx + pkt_desc) & q->mask; + npkts++; + } while (reclaim_idx != head && npkts < limit); + } + + q->cons_cnt += ndesc; + netdev_tx_completed_queue(q->ndq, npkts, nbytes); + smp_mb(); /* pairs with the one in fun_tx_check_stop() */ + + if (unlikely(netif_tx_queue_stopped(q->ndq) && + fun_txq_may_restart(q))) { + netif_tx_wake_queue(q->ndq); + FUN_QSTAT_INC(q, tx_nrestarts); + } + + return reclaim_idx != head; +} + +/* The NAPI handler for Tx queues. */ +int fun_txq_napi_poll(struct napi_struct *napi, int budget) +{ + struct fun_irq *irq = container_of(napi, struct fun_irq, napi); + struct funeth_txq *q = irq->txq; + unsigned int db_val; + + if (fun_txq_reclaim(q, budget)) + return budget; /* exhausted budget */ + + napi_complete(napi); /* exhausted pending work */ + db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask); + writel(db_val, q->db); + return 0; +} + +static void fun_xdp_unmap(const struct funeth_txq *q, unsigned int idx) +{ + const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx); + const struct fun_dataop_gl *gle; + + gle = (const struct fun_dataop_gl *)req->dataop.imm; + dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE); +} + +/* Reclaim up to @budget completed Tx descriptors from a TX XDP queue. */ +static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget) +{ + unsigned int npkts = 0, head, reclaim_idx; + + for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask; + head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) { + /* The HW head is continually updated, ensure we don't read + * descriptor state before the head tells us to reclaim it. + * On the enqueue side the doorbell is an implicit write + * barrier. + */ + rmb(); + + do { + fun_xdp_unmap(q, reclaim_idx); + page_frag_free(q->info[reclaim_idx].vaddr); + + trace_funeth_tx_free(q, reclaim_idx, 1, head); + + reclaim_idx = (reclaim_idx + 1) & q->mask; + npkts++; + } while (reclaim_idx != head && npkts < budget); + } + + q->cons_cnt += npkts; + return npkts; +} + +bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) +{ + struct fun_eth_tx_req *req; + struct fun_dataop_gl *gle; + unsigned int idx; + dma_addr_t dma; + + if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES) + fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH); + + if (!unlikely(fun_txq_avail(q))) { + FUN_QSTAT_INC(q, tx_xdp_full); + return false; + } + + dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(q->dma_dev, dma))) { + FUN_QSTAT_INC(q, tx_map_err); + return false; + } + + idx = q->prod_cnt & q->mask; + req = fun_tx_desc_addr(q, idx); + req->op = FUN_ETH_OP_TX; + req->len8 = (sizeof(*req) + sizeof(*gle)) / 8; + req->flags = 0; + req->suboff8 = offsetof(struct fun_eth_tx_req, dataop); + req->repr_idn = 0; + req->encap_proto = 0; + fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0); + req->dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len); + + gle = (struct fun_dataop_gl *)req->dataop.imm; + fun_dataop_gl_init(gle, 0, 0, len, dma); + + q->info[idx].vaddr = data; + + u64_stats_update_begin(&q->syncp); + q->stats.tx_bytes += len; + q->stats.tx_pkts++; + u64_stats_update_end(&q->syncp); + + trace_funeth_tx(q, len, idx, 1); + q->prod_cnt++; + + return true; +} + +int fun_xdp_xmit_frames(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq *q, **xdpqs; + int i, q_idx; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + xdpqs = rcu_dereference_bh(fp->xdpqs); + if (unlikely(!xdpqs)) + return -ENETDOWN; + + q_idx = smp_processor_id(); + if (unlikely(q_idx >= fp->num_xdpqs)) + return -ENXIO; + + for (q = xdpqs[q_idx], i = 0; i < n; i++) { + const struct xdp_frame *xdpf = frames[i]; + + if (!fun_xdp_tx(q, xdpf->data, xdpf->len)) + break; + } + + if (unlikely(flags & XDP_XMIT_FLUSH)) + fun_txq_wr_db(q); + return i; +} + +/* Purge a Tx queue of any queued packets. Should be called once HW access + * to the packets has been revoked, e.g., after the queue has been disabled. + */ +static void fun_txq_purge(struct funeth_txq *q) +{ + while (q->cons_cnt != q->prod_cnt) { + unsigned int idx = q->cons_cnt & q->mask; + + q->cons_cnt += unmap_skb(q, idx); + dev_kfree_skb_any(q->info[idx].skb); + } + netdev_tx_reset_queue(q->ndq); +} + +static void fun_xdpq_purge(struct funeth_txq *q) +{ + while (q->cons_cnt != q->prod_cnt) { + unsigned int idx = q->cons_cnt & q->mask; + + fun_xdp_unmap(q, idx); + page_frag_free(q->info[idx].vaddr); + q->cons_cnt++; + } +} + +/* Create a Tx queue, allocating all the host resources needed. */ +static struct funeth_txq *fun_txq_create_sw(struct net_device *dev, + unsigned int qidx, + unsigned int ndesc, + struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq *q; + int numa_node; + + if (irq) + numa_node = fun_irq_node(irq); /* skb Tx queue */ + else + numa_node = cpu_to_node(qidx); /* XDP Tx queue */ + + q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); + if (!q) + goto err; + + q->dma_dev = &fp->pdev->dev; + q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE, + sizeof(*q->info), true, numa_node, + &q->dma_addr, (void **)&q->info, + &q->hw_wb); + if (!q->desc) + goto free_q; + + q->netdev = dev; + q->mask = ndesc - 1; + q->qidx = qidx; + q->numa_node = numa_node; + u64_stats_init(&q->syncp); + q->init_state = FUN_QSTATE_INIT_SW; + return q; + +free_q: + kfree(q); +err: + netdev_err(dev, "Can't allocate memory for %s queue %u\n", + irq ? "Tx" : "XDP", qidx); + return NULL; +} + +static void fun_txq_free_sw(struct funeth_txq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true, + q->desc, q->dma_addr, q->info); + + fp->tx_packets += q->stats.tx_pkts; + fp->tx_bytes += q->stats.tx_bytes; + fp->tx_dropped += q->stats.tx_map_err; + + kfree(q); +} + +/* Allocate the device portion of a Tx queue. */ +int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + unsigned int irq_idx, ndesc = q->mask + 1; + int err; + + q->irq = irq; + *q->hw_wb = 0; + q->prod_cnt = 0; + q->cons_cnt = 0; + irq_idx = irq ? irq->irq_idx : 0; + + err = fun_sq_create(fp->fdev, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS | + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0, + FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc, + q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec, + irq_idx, 0, fp->fdev->kern_end_qid, 0, + &q->hw_qid, &q->db); + if (err) + goto out; + + err = fun_create_and_bind_tx(fp, q->hw_qid); + if (err < 0) + goto free_devq; + q->ethid = err; + + if (irq) { + irq->txq = q; + q->ndq = netdev_get_tx_queue(q->netdev, q->qidx); + q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec, + fp->tx_coal_count); + writel(q->irq_db_val, q->db); + } + + q->init_state = FUN_QSTATE_INIT_FULL; + netif_info(fp, ifup, q->netdev, + "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n", + irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx, + q->ethid, q->numa_node); + return 0; + +free_devq: + fun_destroy_sq(fp->fdev, q->hw_qid); +out: + netdev_err(q->netdev, + "Failed to create %s queue %u on device, error %d\n", + irq ? "Tx" : "XDP", q->qidx, err); + return err; +} + +static void fun_txq_free_dev(struct funeth_txq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + if (q->init_state < FUN_QSTATE_INIT_FULL) + return; + + netif_info(fp, ifdown, q->netdev, + "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n", + q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid, + q->irq ? q->irq->irq_idx : 0, q->ethid); + + fun_destroy_sq(fp->fdev, q->hw_qid); + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid); + + if (q->irq) { + q->irq->txq = NULL; + fun_txq_purge(q); + } else { + fun_xdpq_purge(q); + } + + q->init_state = FUN_QSTATE_INIT_SW; +} + +/* Create or advance a Tx queue, allocating all the host and device resources + * needed to reach the target state. + */ +int funeth_txq_create(struct net_device *dev, unsigned int qidx, + unsigned int ndesc, struct fun_irq *irq, int state, + struct funeth_txq **qp) +{ + struct funeth_txq *q = *qp; + int err; + + if (!q) + q = fun_txq_create_sw(dev, qidx, ndesc, irq); + if (!q) + return -ENOMEM; + + if (q->init_state >= state) + goto out; + + err = fun_txq_create_dev(q, irq); + if (err) { + if (!*qp) + fun_txq_free_sw(q); + return err; + } + +out: + *qp = q; + return 0; +} + +/* Free Tx queue resources until it reaches the target state. + * The queue must be already disconnected from the stack. + */ +struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state) +{ + if (state < FUN_QSTATE_INIT_FULL) + fun_txq_free_dev(q); + + if (state == FUN_QSTATE_DESTROYED) { + fun_txq_free_sw(q); + q = NULL; + } + + return q; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h new file mode 100644 index 000000000000..04c9f91b7489 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -0,0 +1,264 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNETH_TXRX_H +#define _FUNETH_TXRX_H + +#include <linux/netdevice.h> +#include <linux/u64_stats_sync.h> + +/* Tx descriptor size */ +#define FUNETH_SQE_SIZE 64U + +/* Size of device headers per Tx packet */ +#define FUNETH_FUNOS_HDR_SZ (sizeof(struct fun_eth_tx_req)) + +/* Number of gather list entries per Tx descriptor */ +#define FUNETH_GLE_PER_DESC (FUNETH_SQE_SIZE / sizeof(struct fun_dataop_gl)) + +/* Max gather list size in bytes for an sk_buff. */ +#define FUNETH_MAX_GL_SZ ((MAX_SKB_FRAGS + 1) * sizeof(struct fun_dataop_gl)) + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +# define FUNETH_TLS_SZ sizeof(struct fun_eth_tls) +#else +# define FUNETH_TLS_SZ 0 +#endif + +/* Max number of Tx descriptors for an sk_buff using a gather list. */ +#define FUNETH_MAX_GL_DESC \ + DIV_ROUND_UP((FUNETH_FUNOS_HDR_SZ + FUNETH_MAX_GL_SZ + FUNETH_TLS_SZ), \ + FUNETH_SQE_SIZE) + +/* Max number of Tx descriptors for any packet. */ +#define FUNETH_MAX_PKT_DESC FUNETH_MAX_GL_DESC + +/* Rx CQ descriptor size. */ +#define FUNETH_CQE_SIZE 64U + +/* Offset of cqe_info within a CQE. */ +#define FUNETH_CQE_INFO_OFFSET (FUNETH_CQE_SIZE - sizeof(struct fun_cqe_info)) + +/* Construct the IRQ portion of a CQ doorbell. The resulting value arms the + * interrupt with the supplied time delay and packet count moderation settings. + */ +#define FUN_IRQ_CQ_DB(usec, pkts) \ + (FUN_DB_IRQ_ARM_F | ((usec) << FUN_DB_INTCOAL_USEC_S) | \ + ((pkts) << FUN_DB_INTCOAL_ENTRIES_S)) + +/* As above for SQ doorbells. */ +#define FUN_IRQ_SQ_DB(usec, pkts) \ + (FUN_DB_IRQ_ARM_F | \ + ((usec) << FUN_DB_INTCOAL_USEC_S) | \ + ((pkts) << FUN_DB_INTCOAL_ENTRIES_S)) + +/* Per packet tailroom. Present only for 1-frag packets. */ +#define FUN_RX_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + +/* Per packet headroom for XDP. Preferred over XDP_PACKET_HEADROOM to + * accommodate two packets per buffer for 4K pages and 1500B MTUs. + */ +#define FUN_XDP_HEADROOM 192 + +/* Initialization state of a queue. */ +enum { + FUN_QSTATE_DESTROYED, /* what queue? */ + FUN_QSTATE_INIT_SW, /* exists in SW, not on the device */ + FUN_QSTATE_INIT_FULL, /* exists both in SW and on device */ +}; + +/* Initialization state of an interrupt. */ +enum { + FUN_IRQ_INIT, /* initialized and in the XArray but inactive */ + FUN_IRQ_REQUESTED, /* request_irq() done */ + FUN_IRQ_ENABLED, /* processing enabled */ + FUN_IRQ_DISABLED, /* processing disabled */ +}; + +struct bpf_prog; + +struct funeth_txq_stats { /* per Tx queue SW counters */ + u64 tx_pkts; /* # of Tx packets */ + u64 tx_bytes; /* total bytes of Tx packets */ + u64 tx_cso; /* # of packets with checksum offload */ + u64 tx_tso; /* # of non-encapsulated TSO super-packets */ + u64 tx_encap_tso; /* # of encapsulated TSO super-packets */ + u64 tx_more; /* # of DBs elided due to xmit_more */ + u64 tx_nstops; /* # of times the queue has stopped */ + u64 tx_nrestarts; /* # of times the queue has restarted */ + u64 tx_map_err; /* # of packets dropped due to DMA mapping errors */ + u64 tx_xdp_full; /* # of XDP packets that could not be enqueued */ + u64 tx_tls_pkts; /* # of Tx TLS packets offloaded to HW */ + u64 tx_tls_bytes; /* Tx bytes of HW-handled TLS payload */ + u64 tx_tls_fallback; /* attempted Tx TLS offloads punted to SW */ + u64 tx_tls_drops; /* attempted Tx TLS offloads dropped */ +}; + +struct funeth_tx_info { /* per Tx descriptor state */ + union { + struct sk_buff *skb; /* associated packet */ + void *vaddr; /* start address for XDP */ + }; +}; + +struct funeth_txq { + /* RO cacheline of frequently accessed data */ + u32 mask; /* queue depth - 1 */ + u32 hw_qid; /* device ID of the queue */ + void *desc; /* base address of descriptor ring */ + struct funeth_tx_info *info; + struct device *dma_dev; /* device for DMA mappings */ + volatile __be64 *hw_wb; /* HW write-back location */ + u32 __iomem *db; /* SQ doorbell register address */ + struct netdev_queue *ndq; + dma_addr_t dma_addr; /* DMA address of descriptor ring */ + /* producer R/W cacheline */ + u16 qidx; /* queue index within net_device */ + u16 ethid; + u32 prod_cnt; /* producer counter */ + struct funeth_txq_stats stats; + /* shared R/W cacheline, primarily accessed by consumer */ + u32 irq_db_val; /* value written to IRQ doorbell */ + u32 cons_cnt; /* consumer (cleanup) counter */ + struct net_device *netdev; + struct fun_irq *irq; + int numa_node; + u8 init_state; /* queue initialization state */ + struct u64_stats_sync syncp; +}; + +struct funeth_rxq_stats { /* per Rx queue SW counters */ + u64 rx_pkts; /* # of received packets, including SW drops */ + u64 rx_bytes; /* total size of received packets */ + u64 rx_cso; /* # of packets with checksum offload */ + u64 rx_bufs; /* total # of Rx buffers provided to device */ + u64 gro_pkts; /* # of GRO superpackets */ + u64 gro_merged; /* # of pkts merged into existing GRO superpackets */ + u64 rx_page_alloc; /* # of page allocations for Rx buffers */ + u64 rx_budget; /* NAPI iterations that exhausted their budget */ + u64 rx_mem_drops; /* # of packets dropped due to memory shortage */ + u64 rx_map_err; /* # of page DMA mapping errors */ + u64 xdp_drops; /* XDP_DROPped packets */ + u64 xdp_tx; /* successful XDP transmits */ + u64 xdp_redir; /* successful XDP redirects */ + u64 xdp_err; /* packets dropped due to XDP errors */ +}; + +struct funeth_rxbuf { /* per Rx buffer state */ + struct page *page; /* associated page */ + dma_addr_t dma_addr; /* DMA address of page start */ + int pg_refs; /* page refs held by driver */ + int node; /* page node, or -1 if it is PF_MEMALLOC */ +}; + +struct funeth_rx_cache { /* cache of DMA-mapped previously used buffers */ + struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */ + unsigned int prod_cnt; /* producer counter */ + unsigned int cons_cnt; /* consumer counter */ + unsigned int mask; /* depth - 1 */ +}; + +/* An Rx queue consists of a CQ and an SQ used to provide Rx buffers. */ +struct funeth_rxq { + struct net_device *netdev; + struct napi_struct *napi; + struct device *dma_dev; /* device for DMA mappings */ + void *cqes; /* base of CQ descriptor ring */ + const void *next_cqe_info; /* fun_cqe_info of next CQE */ + u32 __iomem *cq_db; /* CQ doorbell register address */ + unsigned int cq_head; /* CQ head index */ + unsigned int cq_mask; /* CQ depth - 1 */ + u16 phase; /* CQ phase tag */ + u16 qidx; /* queue index within net_device */ + unsigned int irq_db_val; /* IRQ info for CQ doorbell */ + struct fun_eprq_rqbuf *rqes; /* base of RQ descriptor ring */ + struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */ + struct funeth_rxbuf *cur_buf; /* currently active buffer */ + u32 __iomem *rq_db; /* RQ doorbell register address */ + unsigned int rq_cons; /* RQ consumer counter */ + unsigned int rq_mask; /* RQ depth - 1 */ + unsigned int buf_offset; /* offset of next pkt in head buffer */ + u8 xdp_flush; /* XDP flush types needed at NAPI end */ + u8 init_state; /* queue initialization state */ + u16 headroom; /* per packet headroom */ + unsigned int rq_cons_db; /* value of rq_cons at last RQ db */ + unsigned int rq_db_thres; /* # of new buffers needed to write RQ db */ + struct funeth_rxbuf spare_buf; /* spare for next buffer replacement */ + struct funeth_rx_cache cache; /* used buffer cache */ + struct bpf_prog *xdp_prog; /* optional XDP BPF program */ + struct funeth_rxq_stats stats; + dma_addr_t cq_dma_addr; /* DMA address of CQE ring */ + dma_addr_t rq_dma_addr; /* DMA address of RQE ring */ + u16 irq_cnt; + u32 hw_cqid; /* device ID of the queue's CQ */ + u32 hw_sqid; /* device ID of the queue's SQ */ + int numa_node; + struct u64_stats_sync syncp; + struct xdp_rxq_info xdp_rxq; +}; + +#define FUN_QSTAT_INC(q, counter) \ + do { \ + u64_stats_update_begin(&(q)->syncp); \ + (q)->stats.counter++; \ + u64_stats_update_end(&(q)->syncp); \ + } while (0) + +#define FUN_QSTAT_READ(q, seq, stats_copy) \ + do { \ + seq = u64_stats_fetch_begin(&(q)->syncp); \ + stats_copy = (q)->stats; \ + } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) + +#define FUN_INT_NAME_LEN (IFNAMSIZ + 16) + +struct fun_irq { + struct napi_struct napi; + struct funeth_txq *txq; + struct funeth_rxq *rxq; + u8 state; + u16 irq_idx; /* index of MSI-X interrupt */ + int irq; /* Linux IRQ vector */ + cpumask_t affinity_mask; /* IRQ affinity */ + struct irq_affinity_notify aff_notify; + char name[FUN_INT_NAME_LEN]; +} ____cacheline_internodealigned_in_smp; + +/* Return the start address of the idx-th Tx descriptor. */ +static inline void *fun_tx_desc_addr(const struct funeth_txq *q, + unsigned int idx) +{ + return q->desc + idx * FUNETH_SQE_SIZE; +} + +static inline void fun_txq_wr_db(const struct funeth_txq *q) +{ + unsigned int tail = q->prod_cnt & q->mask; + + writel(tail, q->db); +} + +static inline int fun_irq_node(const struct fun_irq *p) +{ + return cpu_to_mem(cpumask_first(&p->affinity_mask)); +} + +int fun_rxq_napi_poll(struct napi_struct *napi, int budget); +int fun_txq_napi_poll(struct napi_struct *napi, int budget); +netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev); +bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len); +int fun_xdp_xmit_frames(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags); + +int funeth_txq_create(struct net_device *dev, unsigned int qidx, + unsigned int ndesc, struct fun_irq *irq, int state, + struct funeth_txq **qp); +int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq); +struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state); +int funeth_rxq_create(struct net_device *dev, unsigned int qidx, + unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, + int state, struct funeth_rxq **qp); +int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq); +struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state); +int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog); + +#endif /* _FUNETH_TXRX_H */ diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 54e51c8221b8..6cafee55efc3 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -857,8 +857,7 @@ static int gve_alloc_qpls(struct gve_priv *priv) int i, j; int err; - /* Raw addressing means no QPLs */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) + if (num_qpls == 0) return 0; priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL); @@ -901,8 +900,7 @@ static void gve_free_qpls(struct gve_priv *priv) int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); int i; - /* Raw addressing means no QPLs */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) + if (num_qpls == 0) return; kvfree(priv->qpl_cfg.qpl_id_map); diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 9298fbecb31a..6f18c9a03231 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -167,6 +167,7 @@ struct hnae3_handle; struct hnae3_queue { void __iomem *io_base; + void __iomem *mem_base; struct hnae3_ae_algo *ae_algo; struct hnae3_handle *handle; int tqp_index; /* index in a handle */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index babc5d7a3b52..0b8a73c40b12 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2028,9 +2028,73 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, return bd_num; } +static void hns3_tx_push_bd(struct hns3_enet_ring *ring, int num) +{ +#define HNS3_BYTES_PER_64BIT 8 + + struct hns3_desc desc[HNS3_MAX_PUSH_BD_NUM] = {}; + int offset = 0; + + /* make sure everything is visible to device before + * excuting tx push or updating doorbell + */ + dma_wmb(); + + do { + int idx = (ring->next_to_use - num + ring->desc_num) % + ring->desc_num; + + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_push++; + u64_stats_update_end(&ring->syncp); + memcpy(&desc[offset], &ring->desc[idx], + sizeof(struct hns3_desc)); + offset++; + } while (--num); + + __iowrite64_copy(ring->tqp->mem_base, desc, + (sizeof(struct hns3_desc) * HNS3_MAX_PUSH_BD_NUM) / + HNS3_BYTES_PER_64BIT); + + io_stop_wc(); +} + +static void hns3_tx_mem_doorbell(struct hns3_enet_ring *ring) +{ +#define HNS3_MEM_DOORBELL_OFFSET 64 + + __le64 bd_num = cpu_to_le64((u64)ring->pending_buf); + + /* make sure everything is visible to device before + * excuting tx push or updating doorbell + */ + dma_wmb(); + + __iowrite64_copy(ring->tqp->mem_base + HNS3_MEM_DOORBELL_OFFSET, + &bd_num, 1); + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_mem_doorbell += ring->pending_buf; + u64_stats_update_end(&ring->syncp); + + io_stop_wc(); +} + static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, bool doorbell) { + struct net_device *netdev = ring_to_netdev(ring); + struct hns3_nic_priv *priv = netdev_priv(netdev); + + /* when tx push is enabled, the packet whose number of BD below + * HNS3_MAX_PUSH_BD_NUM can be pushed directly. + */ + if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num && + !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) { + hns3_tx_push_bd(ring, num); + WRITE_ONCE(ring->last_to_use, ring->next_to_use); + return; + } + ring->pending_buf += num; if (!doorbell) { @@ -2038,11 +2102,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, return; } - if (!ring->pending_buf) - return; + if (ring->tqp->mem_base) + hns3_tx_mem_doorbell(ring); + else + writel(ring->pending_buf, + ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG); - writel(ring->pending_buf, - ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG); ring->pending_buf = 0; WRITE_ONCE(ring->last_to_use, ring->next_to_use); } @@ -2732,6 +2797,9 @@ static void hns3_dump_queue_stats(struct net_device *ndev, "seg_pkt_cnt: %llu, tx_more: %llu, restart_queue: %llu, tx_busy: %llu\n", tx_ring->stats.seg_pkt_cnt, tx_ring->stats.tx_more, tx_ring->stats.restart_queue, tx_ring->stats.tx_busy); + + netdev_info(ndev, "tx_push: %llu, tx_mem_doorbell: %llu\n", + tx_ring->stats.tx_push, tx_ring->stats.tx_mem_doorbell); } static void hns3_dump_queue_reg(struct net_device *ndev, @@ -5094,6 +5162,9 @@ static void hns3_state_init(struct hnae3_handle *handle) set_bit(HNS3_NIC_STATE_INITED, &priv->state); + if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps)) + set_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state); + if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index a05a0c7423ce..4a3253692dcc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -7,6 +7,7 @@ #include <linux/dim.h> #include <linux/if_vlan.h> #include <net/page_pool.h> +#include <asm/barrier.h> #include "hnae3.h" @@ -25,9 +26,12 @@ enum hns3_nic_state { HNS3_NIC_STATE2_RESET_REQUESTED, HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, + HNS3_NIC_STATE_TX_PUSH_ENABLE, HNS3_NIC_STATE_MAX }; +#define HNS3_MAX_PUSH_BD_NUM 2 + #define HNS3_RING_RX_RING_BASEADDR_L_REG 0x00000 #define HNS3_RING_RX_RING_BASEADDR_H_REG 0x00004 #define HNS3_RING_RX_RING_BD_NUM_REG 0x00008 @@ -410,6 +414,8 @@ struct ring_stats { u64 tx_pkts; u64 tx_bytes; u64 tx_more; + u64 tx_push; + u64 tx_mem_doorbell; u64 restart_queue; u64 tx_busy; u64 tx_copy; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index c06c39ece80d..6469238ae090 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -23,6 +23,8 @@ static const struct hns3_stats hns3_txq_stats[] = { HNS3_TQP_STAT("packets", tx_pkts), HNS3_TQP_STAT("bytes", tx_bytes), HNS3_TQP_STAT("more", tx_more), + HNS3_TQP_STAT("push", tx_push), + HNS3_TQP_STAT("mem_doorbell", tx_mem_doorbell), HNS3_TQP_STAT("wake", restart_queue), HNS3_TQP_STAT("busy", tx_busy), HNS3_TQP_STAT("copy", tx_copy), diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 24f7afacae02..78d0498bdabc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1643,6 +1643,7 @@ static int hclge_config_gro(struct hclge_dev *hdev) static int hclge_alloc_tqps(struct hclge_dev *hdev) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); struct hclge_comm_tqp *tqp; int i; @@ -1676,6 +1677,14 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev) (i - HCLGE_TQP_MAX_SIZE_DEV_V2) * HCLGE_TQP_REG_SIZE; + /* when device supports tx push and has device memory, + * the queue can execute push mode or doorbell mode on + * device memory. + */ + if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps)) + tqp->q.mem_base = hdev->hw.hw.mem_base + + HCLGE_TQP_MEM_OFFSET(hdev, i); + tqp++; } @@ -11008,8 +11017,6 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, static int hclge_dev_mem_map(struct hclge_dev *hdev) { -#define HCLGE_MEM_BAR 4 - struct pci_dev *pdev = hdev->pdev; struct hclge_hw *hw = &hdev->hw; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index adfb26e79262..fc92ae385e30 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -169,6 +169,14 @@ enum HLCGE_PORT_TYPE { #define HCLGE_VECTOR0_ALL_MSIX_ERR_B 6U #define HCLGE_TRIGGER_IMP_RESET_B 7U +#define HCLGE_TQP_MEM_SIZE 0x10000 +#define HCLGE_MEM_BAR 4 +/* in the bar4, the first half is for roce, and the second half is for nic */ +#define HCLGE_NIC_MEM_OFFSET(hdev) \ + (pci_resource_len((hdev)->pdev, HCLGE_MEM_BAR) >> 1) +#define HCLGE_TQP_MEM_OFFSET(hdev, i) \ + (HCLGE_NIC_MEM_OFFSET(hdev) + HCLGE_TQP_MEM_SIZE * (i)) + #define HCLGE_MAC_DEFAULT_FRAME \ (ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN) #define HCLGE_MAC_MIN_FRAME 64 @@ -1060,11 +1068,6 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue) return tqp->index; } -static inline bool hclge_is_reset_pending(struct hclge_dev *hdev) -{ - return !!hdev->reset_pending; -} - int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport); int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex); int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 21442a9bb996..93389bec8d89 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -321,6 +321,7 @@ static int hclgevf_get_pf_media_type(struct hclgevf_dev *hdev) static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); struct hclge_comm_tqp *tqp; int i; @@ -354,6 +355,14 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev) (i - HCLGEVF_TQP_MAX_SIZE_DEV_V2) * HCLGEVF_TQP_REG_SIZE; + /* when device supports tx push and has device memory, + * the queue can execute push mode or doorbell mode on + * device memory. + */ + if (test_bit(HNAE3_DEV_SUPPORT_TX_PUSH_B, ae_dev->caps)) + tqp->q.mem_base = hdev->hw.hw.mem_base + + HCLGEVF_TQP_MEM_OFFSET(hdev, i); + tqp++; } @@ -2546,8 +2555,6 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, static int hclgevf_dev_mem_map(struct hclgevf_dev *hdev) { -#define HCLGEVF_MEM_BAR 4 - struct pci_dev *pdev = hdev->pdev; struct hclgevf_hw *hw = &hdev->hw; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 502ca1ce1a90..4b00fd44f118 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -96,6 +96,14 @@ #define HCLGEVF_RSS_IND_TBL_SIZE 512 +#define HCLGEVF_TQP_MEM_SIZE 0x10000 +#define HCLGEVF_MEM_BAR 4 +/* in the bar4, the first half is for roce, and the second half is for nic */ +#define HCLGEVF_NIC_MEM_OFFSET(hdev) \ + (pci_resource_len((hdev)->pdev, HCLGEVF_MEM_BAR) >> 1) +#define HCLGEVF_TQP_MEM_OFFSET(hdev, i) \ + (HCLGEVF_NIC_MEM_OFFSET(hdev) + HCLGEVF_TQP_MEM_SIZE * (i)) + #define HCLGEVF_MAC_MAX_FRAME 9728 #define HCLGEVF_STATS_TIMER_INTERVAL 36U diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b423e94956f1..869a4fe17c7c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -60,6 +60,7 @@ #include <asm/hvcall.h> #include <linux/atomic.h> #include <asm/vio.h> +#include <asm/xive.h> #include <asm/iommu.h> #include <linux/uaccess.h> #include <asm/firmware.h> @@ -3640,6 +3641,30 @@ static int disable_scrq_irq(struct ibmvnic_adapter *adapter, return rc; } +/* We can not use the IRQ chip EOI handler because that has the + * unintended effect of changing the interrupt priority. + */ +static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq) +{ + u64 val = 0xff000000 | scrq->hw_irq; + unsigned long rc; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); +} + +/* Due to a firmware bug, the hypervisor can send an interrupt to a + * transmit or receive queue just prior to a partition migration. + * Force an EOI after migration. + */ +static void ibmvnic_clear_pending_interrupt(struct device *dev, + struct ibmvnic_sub_crq_queue *scrq) +{ + if (!xive_enabled()) + ibmvnic_xics_eoi(dev, scrq); +} + static int enable_scrq_irq(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { @@ -3653,15 +3678,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, if (test_bit(0, &adapter->resetting) && adapter->reset_reason == VNIC_RESET_MOBILITY) { - u64 val = (0xff000000) | scrq->hw_irq; - - rc = plpar_hcall_norets(H_EOI, val); - /* H_EOI would fail with rc = H_FUNCTION when running - * in XIVE mode which is expected, but not an error. - */ - if (rc && (rc != H_FUNCTION)) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + ibmvnic_clear_pending_interrupt(dev, scrq); } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c5bdef3ffe26..fa06f68c8c80 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7414,9 +7414,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) resource_size_t flash_start, flash_len; static int cards_found; u16 aspm_disable_flag = 0; - int bars, i, err, pci_using_dac; u16 eeprom_data = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; + int bars, i, err; s32 ret_val = 0; if (ei->flags2 & FLAG2_DISABLE_ASPM_L0S) @@ -7430,17 +7430,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } bars = pci_select_bars(pdev, IORESOURCE_MEM); @@ -7576,10 +7570,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; - if (pci_using_dac) { - netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_HIGHDMA; - } + netdev->features |= NETIF_F_HIGHDMA; + netdev->vlan_features |= NETIF_F_HIGHDMA; /* MTU range: 68 - max_hw_frame_size */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 0f0efee5fc8e..fd07c3679bb1 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -146,11 +146,11 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) break; } if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Read did not complete\n"); + e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset); return -E1000_ERR_PHY; } if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Error\n"); + e_dbg("MDI Read PHY Reg Address %d Error\n", offset); return -E1000_ERR_PHY; } if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { @@ -210,11 +210,11 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) break; } if (!(mdic & E1000_MDIC_READY)) { - e_dbg("MDI Write did not complete\n"); + e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset); return -E1000_ERR_PHY; } if (mdic & E1000_MDIC_ERROR) { - e_dbg("MDI Error\n"); + e_dbg("MDI Write PHY Red Address %d Error\n", offset); return -E1000_ERR_PHY; } if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 80c5cecaf2b5..55c6bce5da61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -854,6 +854,10 @@ struct i40e_vsi { u64 tx_force_wb; u64 rx_buf_failed; u64 rx_page_failed; + u64 rx_page_reuse; + u64 rx_page_alloc; + u64 rx_page_waive; + u64 rx_page_busy; /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 7abef88801fb..42439f725aa4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -769,7 +769,7 @@ static bool i40e_asq_done(struct i40e_hw *hw) } /** - * i40e_asq_send_command_atomic - send command to Admin Queue + * i40e_asq_send_command_atomic_exec - send command to Admin Queue * @hw: pointer to the hw struct * @desc: prefilled descriptor describing the command (non DMA mem) * @buff: buffer to use for indirect commands @@ -780,11 +780,13 @@ static bool i40e_asq_done(struct i40e_hw *hw) * This is the main send command driver routine for the Admin Queue send * queue. It runs the queue, cleans the queue, etc **/ -i40e_status -i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, - void *buff, /* can be NULL */ u16 buff_size, - struct i40e_asq_cmd_details *cmd_details, - bool is_atomic_context) +static i40e_status +i40e_asq_send_command_atomic_exec(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context) { i40e_status status = 0; struct i40e_dma_mem *dma_buff = NULL; @@ -794,8 +796,6 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, u16 retval = 0; u32 val = 0; - mutex_lock(&hw->aq.asq_mutex); - if (hw->aq.asq.count == 0) { i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: Admin queue not initialized.\n"); @@ -969,6 +969,36 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, } asq_send_command_error: + return status; +} + +/** + * i40e_asq_send_command_atomic - send command to Admin Queue + * @hw: pointer to the hw struct + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buff: buffer to use for indirect commands + * @buff_size: size of buffer for indirect commands + * @cmd_details: pointer to command details structure + * @is_atomic_context: is the function called in an atomic context? + * + * Acquires the lock and calls the main send command execution + * routine. + **/ +i40e_status +i40e_asq_send_command_atomic(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context) +{ + i40e_status status; + + mutex_lock(&hw->aq.asq_mutex); + status = i40e_asq_send_command_atomic_exec(hw, desc, buff, buff_size, + cmd_details, + is_atomic_context); + mutex_unlock(&hw->aq.asq_mutex); return status; } @@ -983,6 +1013,52 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc, } /** + * i40e_asq_send_command_atomic_v2 - send command to Admin Queue + * @hw: pointer to the hw struct + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buff: buffer to use for indirect commands + * @buff_size: size of buffer for indirect commands + * @cmd_details: pointer to command details structure + * @is_atomic_context: is the function called in an atomic context? + * @aq_status: pointer to Admin Queue status return value + * + * Acquires the lock and calls the main send command execution + * routine. Returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + **/ +i40e_status +i40e_asq_send_command_atomic_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context, + enum i40e_admin_queue_err *aq_status) +{ + i40e_status status; + + mutex_lock(&hw->aq.asq_mutex); + status = i40e_asq_send_command_atomic_exec(hw, desc, buff, + buff_size, + cmd_details, + is_atomic_context); + if (aq_status) + *aq_status = hw->aq.asq_last_status; + mutex_unlock(&hw->aq.asq_mutex); + return status; +} + +i40e_status +i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size, + cmd_details, true, aq_status); +} + +/** * i40e_fill_default_direct_cmd_desc - AQ descriptor helper function * @desc: pointer to the temp descriptor (non DMA mem) * @opcode: the opcode can be used to decide which flags to turn off or on diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 9ddeb015eb7e..6aefffd83615 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1899,8 +1899,9 @@ i40e_status i40e_aq_add_vsi(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); if (status) goto aq_add_vsi_exit; @@ -2287,8 +2288,9 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used); vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); @@ -2632,33 +2634,28 @@ get_veb_exit: } /** - * i40e_aq_add_macvlan - * @hw: pointer to the hw struct - * @seid: VSI for the mac address + * i40e_prepare_add_macvlan * @mv_list: list of macvlans to be added + * @desc: pointer to AQ descriptor structure * @count: length of the list - * @cmd_details: pointer to command details structure or NULL + * @seid: VSI for the mac address * - * Add MAC/VLAN addresses to the HW filtering + * Internal helper function that prepares the add macvlan request + * and returns the buffer size. **/ -i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, - struct i40e_aqc_add_macvlan_element_data *mv_list, - u16 count, struct i40e_asq_cmd_details *cmd_details) +static u16 +i40e_prepare_add_macvlan(struct i40e_aqc_add_macvlan_element_data *mv_list, + struct i40e_aq_desc *desc, u16 count, u16 seid) { - struct i40e_aq_desc desc; struct i40e_aqc_macvlan *cmd = - (struct i40e_aqc_macvlan *)&desc.params.raw; - i40e_status status; + (struct i40e_aqc_macvlan *)&desc->params.raw; u16 buf_size; int i; - if (count == 0 || !mv_list || !hw) - return I40E_ERR_PARAM; - buf_size = count * sizeof(*mv_list); /* prep the rest of the request */ - i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_macvlan); + i40e_fill_default_direct_cmd_desc(desc, i40e_aqc_opc_add_macvlan); cmd->num_addresses = cpu_to_le16(count); cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid); cmd->seid[1] = 0; @@ -2669,14 +2666,71 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, mv_list[i].flags |= cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC); - desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + desc->flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); if (buf_size > I40E_AQ_LARGE_BUF) - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + return buf_size; +} - return status; +/** + * i40e_aq_add_macvlan + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be added + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * + * Add MAC/VLAN addresses to the HW filtering + **/ +i40e_status +i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid); + + return i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); +} + +/** + * i40e_aq_add_macvlan_v2 + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be added + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * @aq_status: pointer to Admin Queue status return value + * + * Add MAC/VLAN addresses to the HW filtering. + * The _v2 version returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + * It also calls _v2 versions of asq_send_command functions to + * get the aq_status on the stack. + **/ +i40e_status +i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid); + + return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size, + cmd_details, true, aq_status); } /** @@ -2715,13 +2769,59 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); return status; } /** + * i40e_aq_remove_macvlan_v2 + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be removed + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * @aq_status: pointer to Admin Queue status return value + * + * Remove MAC/VLAN addresses from the HW filtering. + * The _v2 version returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + * It also calls _v2 versions of asq_send_command functions to + * get the aq_status on the stack. + **/ +i40e_status +i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_remove_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + struct i40e_aqc_macvlan *cmd; + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = count * sizeof(*mv_list); + + /* prep the rest of the request */ + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan); + cmd = (struct i40e_aqc_macvlan *)&desc.params.raw; + cmd->num_addresses = cpu_to_le16(count); + cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid); + cmd->seid[1] = 0; + cmd->seid[2] = 0; + + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + if (buf_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + + return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size, + cmd_details, true, aq_status); +} + +/** * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule * @hw: pointer to the hw struct * @opcode: AQ opcode for add or delete mirror rule @@ -3868,7 +3968,8 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid, cmd->seid = cpu_to_le16(seid); - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0, + cmd_details, true); return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 9db5001297c7..be7c6f34d45c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -275,9 +275,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.alloc_page_failed, rx_ring->rx_stats.alloc_buff_failed); dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n", + " rx_rings[%i]: rx_stats: realloc_count = 0, page_reuse_count = %lld\n", i, - rx_ring->rx_stats.realloc_count, rx_ring->rx_stats.page_reuse_count); dev_info(&pf->pdev->dev, " rx_rings[%i]: size = %i\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 091f36adbbe1..e48499624d22 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -295,6 +295,10 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("tx_busy", tx_busy), I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), + I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse), + I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc), + I40E_VSI_STAT("rx_cache_waive", rx_page_waive), + I40E_VSI_STAT("rx_cache_busy", rx_page_busy), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 31b03fe78d3b..6778df2177a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -773,6 +773,7 @@ void i40e_update_veb_stats(struct i40e_veb *veb) **/ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) { + u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy; struct i40e_pf *pf = vsi->back; struct rtnl_link_stats64 *ons; struct rtnl_link_stats64 *ns; /* netdev stats */ @@ -780,7 +781,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct i40e_eth_stats *es; /* device's eth stats */ u64 tx_restart, tx_busy; struct i40e_ring *p; - u64 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; @@ -806,6 +806,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; rx_page = 0; rx_buf = 0; + rx_reuse = 0; + rx_alloc = 0; + rx_waive = 0; + rx_busy = 0; rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ @@ -839,6 +843,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; rx_page += p->rx_stats.alloc_page_failed; + rx_reuse += p->rx_stats.page_reuse_count; + rx_alloc += p->rx_stats.page_alloc_count; + rx_waive += p->rx_stats.page_waive_count; + rx_busy += p->rx_stats.page_busy_count; if (i40e_enabled_xdp_vsi(vsi)) { /* locate XDP ring */ @@ -866,6 +874,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_force_wb = tx_force_wb; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; + vsi->rx_page_reuse = rx_reuse; + vsi->rx_page_alloc = rx_alloc; + vsi->rx_page_waive = rx_waive; + vsi->rx_page_busy = rx_busy; ns->rx_packets = rx_p; ns->rx_bytes = rx_b; @@ -2143,19 +2155,19 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, int num_del, int *retval) { struct i40e_hw *hw = &vsi->back->hw; + enum i40e_admin_queue_err aq_status; i40e_status aq_ret; - int aq_err; - aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL); - aq_err = hw->aq.asq_last_status; + aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL, + &aq_status); /* Explicitly ignore and do not report when firmware returns ENOENT */ - if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) { + if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) { *retval = -EIO; dev_info(&vsi->back->pdev->dev, "ignoring delete macvlan error on %s, err %s, aq_err %s\n", vsi_name, i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, aq_err)); + i40e_aq_str(hw, aq_status)); } } @@ -2178,10 +2190,10 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, int num_add) { struct i40e_hw *hw = &vsi->back->hw; - int aq_err, fcnt; + enum i40e_admin_queue_err aq_status; + int fcnt; - i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL); - aq_err = hw->aq.asq_last_status; + i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status); fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { @@ -2189,17 +2201,19 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", - i40e_aq_str(hw, aq_err), vsi_name); + i40e_aq_str(hw, aq_status), vsi_name); } else if (vsi->type == I40E_VSI_SRIOV || vsi->type == I40E_VSI_VMDQ1 || vsi->type == I40E_VSI_VMDQ2) { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n", - i40e_aq_str(hw, aq_err), vsi_name, vsi_name); + i40e_aq_str(hw, aq_status), vsi_name, + vsi_name); } else { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, incorrect VSI type: %i.\n", - i40e_aq_str(hw, aq_err), vsi_name, vsi->type); + i40e_aq_str(hw, aq_status), vsi_name, + vsi->type); } } } @@ -12712,7 +12726,8 @@ static int i40e_set_features(struct net_device *netdev, else i40e_vlan_stripping_disable(vsi); - if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) { + if (!(features & NETIF_F_HW_TC) && + (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) { dev_err(&pf->pdev->dev, "Offloaded tc filters active, can't turn hw_tc_offload off"); return -EINVAL; @@ -13468,6 +13483,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + netdev->features &= ~NETIF_F_HW_TC; + if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); ether_addr_copy(mac_addr, hw->mac.perm_addr); @@ -15331,12 +15348,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* set up for high or low dma */ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "DMA configuration failed: 0x%x\n", err); - goto err_dma; - } + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; } /* set up pci connections */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 9241b6005ad3..ebdcde6f1aeb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -27,10 +27,25 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details); i40e_status +i40e_asq_send_command_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); +i40e_status i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details, bool is_atomic_context); +i40e_status +i40e_asq_send_command_atomic_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context, + enum i40e_admin_queue_err *aq_status); /* debug function for adminq */ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, @@ -150,9 +165,19 @@ i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw, i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_add_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_remove_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_remove_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, struct i40e_asq_cmd_details *cmd_details, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 66cc79500c10..0eae5858f2fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -830,8 +830,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) i40e_clean_tx_ring(tx_ring); kfree(tx_ring->tx_bi); tx_ring->tx_bi = NULL; - kfree(tx_ring->xsk_descs); - tx_ring->xsk_descs = NULL; if (tx_ring->desc) { dma_free_coherent(tx_ring->dev, tx_ring->size, @@ -1382,8 +1380,6 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, new_buff->page_offset = old_buff->page_offset; new_buff->pagecnt_bias = old_buff->pagecnt_bias; - rx_ring->rx_stats.page_reuse_count++; - /* clear contents of buffer_info */ old_buff->page = NULL; } @@ -1433,13 +1429,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!tx_ring->tx_bi) goto err; - if (ring_is_xdp(tx_ring)) { - tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs), - GFP_KERNEL); - if (!tx_ring->xsk_descs) - goto err; - } - u64_stats_init(&tx_ring->syncp); /* round up to nearest 4K */ @@ -1463,8 +1452,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) return 0; err: - kfree(tx_ring->xsk_descs); - tx_ring->xsk_descs = NULL; kfree(tx_ring->tx_bi); tx_ring->tx_bi = NULL; return -ENOMEM; @@ -1675,6 +1662,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, return false; } + rx_ring->rx_stats.page_alloc_count++; + /* map page for use */ dma = dma_map_page_attrs(rx_ring->dev, page, 0, i40e_rx_pg_size(rx_ring), @@ -1982,32 +1971,43 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, /** * i40e_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buffer: buffer containing the page + * @rx_stats: rx stats structure for the rx ring * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call * * If page is reusable, we have a green light for calling i40e_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and - * page freed + * page freed. + * + * rx_stats will be updated to indicate whether the page was waived + * or busy if it could not be reused. */ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct i40e_rx_queue_stats *rx_stats, int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; /* Is any reuse possible? */ - if (!dev_page_is_reusable(page)) + if (!dev_page_is_reusable(page)) { + rx_stats->page_waive_count++; return false; + } #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) { + rx_stats->page_busy_count++; return false; + } #else #define I40E_LAST_OFFSET \ (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) - if (rx_buffer->page_offset > I40E_LAST_OFFSET) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) { + rx_stats->page_busy_count++; return false; + } #endif /* If we have drained the page fragment pool we need to update @@ -2237,7 +2237,7 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, int rx_buffer_pgcnt) { - if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { + if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); } else { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index bfc2845c99d1..c471c2da313c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -298,7 +298,9 @@ struct i40e_rx_queue_stats { u64 alloc_page_failed; u64 alloc_buff_failed; u64 page_reuse_count; - u64 realloc_count; + u64 page_alloc_count; + u64 page_waive_count; + u64 page_busy_count; }; enum i40e_ring_state_t { @@ -390,7 +392,6 @@ struct i40e_ring { u16 rx_offset; struct xdp_rxq_info xdp_rxq; struct xsk_buff_pool *xsk_pool; - struct xdp_desc *xsk_descs; /* For storing descriptors in the AF_XDP ZC path */ } ____cacheline_internodealigned_in_smp; static inline bool ring_uses_build_skb(struct i40e_ring *ring) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 945b1bb9c6f4..c1d25b0b0ca2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -218,7 +218,6 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) ntu += nb_buffs; if (ntu == rx_ring->count) { rx_desc = I40E_RX_DESC(rx_ring, 0); - xdp = i40e_rx_bi(rx_ring, 0); ntu = 0; } @@ -241,21 +240,25 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto out; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } out: xsk_buff_free(xdp); @@ -324,11 +327,11 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); u16 next_to_clean = rx_ring->next_to_clean; u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; bool failure = false; + u16 cleaned_count; while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; @@ -467,11 +470,11 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) **/ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) { - struct xdp_desc *descs = xdp_ring->xsk_descs; + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; - nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget); + nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); if (!nb_pkts) return true; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 4babe4705a55..49aed3e506a6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -44,6 +44,9 @@ #define DEFAULT_DEBUG_LEVEL_SHIFT 3 #define PFX "iavf: " +int iavf_status_to_errno(enum iavf_status status); +int virtchnl_status_to_errno(enum virtchnl_status_code v_status); + /* VSI state flags shared with common code */ enum iavf_vsi_state_t { __IAVF_VSI_DOWN, @@ -188,7 +191,7 @@ enum iavf_state_t { __IAVF_REMOVE, /* driver is being unloaded */ __IAVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */ __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ - __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS, + __IAVF_INIT_EXTENDED_CAPS, /* process extended caps which require aq msg exchange */ __IAVF_INIT_CONFIG_ADAPTER, __IAVF_INIT_SW, /* got resources, setting up structs */ __IAVF_INIT_FAILED, /* init failed, restarting procedure */ @@ -334,6 +337,21 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_ENABLE_STAG_VLAN_INSERTION BIT_ULL(37) #define IAVF_FLAG_AQ_DISABLE_STAG_VLAN_INSERTION BIT_ULL(38) + /* flags for processing extended capability messages during + * __IAVF_INIT_EXTENDED_CAPS. Each capability exchange requires + * both a SEND and a RECV step, which must be processed in sequence. + * + * During the __IAVF_INIT_EXTENDED_CAPS state, the driver will + * process one flag at a time during each state loop. + */ + u64 extended_caps; +#define IAVF_EXTENDED_CAP_SEND_VLAN_V2 BIT_ULL(0) +#define IAVF_EXTENDED_CAP_RECV_VLAN_V2 BIT_ULL(1) + +#define IAVF_EXTENDED_CAPS \ + (IAVF_EXTENDED_CAP_SEND_VLAN_V2 | \ + IAVF_EXTENDED_CAP_RECV_VLAN_V2) + /* OS defined structs */ struct net_device *netdev; struct pci_dev *pdev; @@ -515,7 +533,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter); void iavf_del_vlans(struct iavf_adapter *adapter); void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags); void iavf_request_stats(struct iavf_adapter *adapter); -void iavf_request_reset(struct iavf_adapter *adapter); +int iavf_request_reset(struct iavf_adapter *adapter); void iavf_get_hena(struct iavf_adapter *adapter); void iavf_set_hena(struct iavf_adapter *adapter); void iavf_set_rss_key(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index e9cc7f6ddc46..34e46a23894f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -131,8 +131,8 @@ const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err) return "IAVF_ERR_INVALID_MAC_ADDR"; case IAVF_ERR_DEVICE_NOT_SUPPORTED: return "IAVF_ERR_DEVICE_NOT_SUPPORTED"; - case IAVF_ERR_MASTER_REQUESTS_PENDING: - return "IAVF_ERR_MASTER_REQUESTS_PENDING"; + case IAVF_ERR_PRIMARY_REQUESTS_PENDING: + return "IAVF_ERR_PRIMARY_REQUESTS_PENDING"; case IAVF_ERR_INVALID_LINK_SETTINGS: return "IAVF_ERR_INVALID_LINK_SETTINGS"; case IAVF_ERR_AUTONEG_NOT_COMPLETE: diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8e644e9ed8da..7c4b75a5e1b5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -51,6 +51,113 @@ MODULE_LICENSE("GPL v2"); static const struct net_device_ops iavf_netdev_ops; struct workqueue_struct *iavf_wq; +int iavf_status_to_errno(enum iavf_status status) +{ + switch (status) { + case IAVF_SUCCESS: + return 0; + case IAVF_ERR_PARAM: + case IAVF_ERR_MAC_TYPE: + case IAVF_ERR_INVALID_MAC_ADDR: + case IAVF_ERR_INVALID_LINK_SETTINGS: + case IAVF_ERR_INVALID_PD_ID: + case IAVF_ERR_INVALID_QP_ID: + case IAVF_ERR_INVALID_CQ_ID: + case IAVF_ERR_INVALID_CEQ_ID: + case IAVF_ERR_INVALID_AEQ_ID: + case IAVF_ERR_INVALID_SIZE: + case IAVF_ERR_INVALID_ARP_INDEX: + case IAVF_ERR_INVALID_FPM_FUNC_ID: + case IAVF_ERR_QP_INVALID_MSG_SIZE: + case IAVF_ERR_INVALID_FRAG_COUNT: + case IAVF_ERR_INVALID_ALIGNMENT: + case IAVF_ERR_INVALID_PUSH_PAGE_INDEX: + case IAVF_ERR_INVALID_IMM_DATA_SIZE: + case IAVF_ERR_INVALID_VF_ID: + case IAVF_ERR_INVALID_HMCFN_ID: + case IAVF_ERR_INVALID_PBLE_INDEX: + case IAVF_ERR_INVALID_SD_INDEX: + case IAVF_ERR_INVALID_PAGE_DESC_INDEX: + case IAVF_ERR_INVALID_SD_TYPE: + case IAVF_ERR_INVALID_HMC_OBJ_INDEX: + case IAVF_ERR_INVALID_HMC_OBJ_COUNT: + case IAVF_ERR_INVALID_SRQ_ARM_LIMIT: + return -EINVAL; + case IAVF_ERR_NVM: + case IAVF_ERR_NVM_CHECKSUM: + case IAVF_ERR_PHY: + case IAVF_ERR_CONFIG: + case IAVF_ERR_UNKNOWN_PHY: + case IAVF_ERR_LINK_SETUP: + case IAVF_ERR_ADAPTER_STOPPED: + case IAVF_ERR_PRIMARY_REQUESTS_PENDING: + case IAVF_ERR_AUTONEG_NOT_COMPLETE: + case IAVF_ERR_RESET_FAILED: + case IAVF_ERR_BAD_PTR: + case IAVF_ERR_SWFW_SYNC: + case IAVF_ERR_QP_TOOMANY_WRS_POSTED: + case IAVF_ERR_QUEUE_EMPTY: + case IAVF_ERR_FLUSHED_QUEUE: + case IAVF_ERR_OPCODE_MISMATCH: + case IAVF_ERR_CQP_COMPL_ERROR: + case IAVF_ERR_BACKING_PAGE_ERROR: + case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE: + case IAVF_ERR_MEMCPY_FAILED: + case IAVF_ERR_SRQ_ENABLED: + case IAVF_ERR_ADMIN_QUEUE_ERROR: + case IAVF_ERR_ADMIN_QUEUE_FULL: + case IAVF_ERR_BAD_IWARP_CQE: + case IAVF_ERR_NVM_BLANK_MODE: + case IAVF_ERR_PE_DOORBELL_NOT_ENABLED: + case IAVF_ERR_DIAG_TEST_FAILED: + case IAVF_ERR_FIRMWARE_API_VERSION: + case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR: + return -EIO; + case IAVF_ERR_DEVICE_NOT_SUPPORTED: + return -ENODEV; + case IAVF_ERR_NO_AVAILABLE_VSI: + case IAVF_ERR_RING_FULL: + return -ENOSPC; + case IAVF_ERR_NO_MEMORY: + return -ENOMEM; + case IAVF_ERR_TIMEOUT: + case IAVF_ERR_ADMIN_QUEUE_TIMEOUT: + return -ETIMEDOUT; + case IAVF_ERR_NOT_IMPLEMENTED: + case IAVF_NOT_SUPPORTED: + return -EOPNOTSUPP; + case IAVF_ERR_ADMIN_QUEUE_NO_WORK: + return -EALREADY; + case IAVF_ERR_NOT_READY: + return -EBUSY; + case IAVF_ERR_BUF_TOO_SHORT: + return -EMSGSIZE; + } + + return -EIO; +} + +int virtchnl_status_to_errno(enum virtchnl_status_code v_status) +{ + switch (v_status) { + case VIRTCHNL_STATUS_SUCCESS: + return 0; + case VIRTCHNL_STATUS_ERR_PARAM: + case VIRTCHNL_STATUS_ERR_INVALID_VF_ID: + return -EINVAL; + case VIRTCHNL_STATUS_ERR_NO_MEMORY: + return -ENOMEM; + case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH: + case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR: + case VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR: + return -EIO; + case VIRTCHNL_STATUS_ERR_NOT_SUPPORTED: + return -EOPNOTSUPP; + } + + return -EIO; +} + /** * iavf_pdev_to_adapter - go from pci_dev to adapter * @pdev: pci_dev pointer @@ -877,6 +984,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; f->is_new_mac = true; + f->is_primary = false; adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; } else { f->remove = false; @@ -910,17 +1018,22 @@ static int iavf_set_mac(struct net_device *netdev, void *p) f = iavf_find_filter(adapter, hw->mac.addr); if (f) { f->remove = true; + f->is_primary = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } f = iavf_add_filter(adapter, addr->sa_data); - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - if (f) { + f->is_primary = true; ether_addr_copy(hw->mac.addr, addr->sa_data); } + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + /* schedule the watchdog task to immediately process the request */ + if (f) + queue_work(iavf_wq, &adapter->watchdog_task.work); + return (f == NULL) ? -ENOMEM : 0; } @@ -1421,7 +1534,7 @@ static int iavf_config_rss_aq(struct iavf_adapter *adapter) struct iavf_aqc_get_set_rss_key_data *rss_key = (struct iavf_aqc_get_set_rss_key_data *)adapter->rss_key; struct iavf_hw *hw = &adapter->hw; - int ret = 0; + enum iavf_status status; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -1430,24 +1543,25 @@ static int iavf_config_rss_aq(struct iavf_adapter *adapter) return -EBUSY; } - ret = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); - if (ret) { + status = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key); + if (status) { dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", - iavf_stat_str(hw, ret), + iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); - return ret; + return iavf_status_to_errno(status); } - ret = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, - adapter->rss_lut, adapter->rss_lut_size); - if (ret) { + status = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false, + adapter->rss_lut, adapter->rss_lut_size); + if (status) { dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n", - iavf_stat_str(hw, ret), + iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); + return iavf_status_to_errno(status); } - return ret; + return 0; } @@ -1517,7 +1631,6 @@ static void iavf_fill_rss_lut(struct iavf_adapter *adapter) static int iavf_init_rss(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; - int ret; if (!RSS_PF(adapter)) { /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ @@ -1533,9 +1646,8 @@ static int iavf_init_rss(struct iavf_adapter *adapter) iavf_fill_rss_lut(adapter); netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size); - ret = iavf_config_rss(adapter); - return ret; + return iavf_config_rss(adapter); } /** @@ -2003,23 +2115,24 @@ static void iavf_startup(struct iavf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; - int err; + enum iavf_status status; + int ret; WARN_ON(adapter->state != __IAVF_STARTUP); /* driver loaded, probe complete */ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - err = iavf_set_mac_type(hw); - if (err) { - dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", err); + status = iavf_set_mac_type(hw); + if (status) { + dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", status); goto err; } - err = iavf_check_reset_complete(hw); - if (err) { + ret = iavf_check_reset_complete(hw); + if (ret) { dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n", - err); + ret); goto err; } hw->aq.num_arq_entries = IAVF_AQ_LEN; @@ -2027,14 +2140,15 @@ static void iavf_startup(struct iavf_adapter *adapter) hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE; hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE; - err = iavf_init_adminq(hw); - if (err) { - dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", err); + status = iavf_init_adminq(hw); + if (status) { + dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", + status); goto err; } - err = iavf_send_api_ver(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err); + ret = iavf_send_api_ver(adapter); + if (ret) { + dev_err(&pdev->dev, "Unable to send to PF (%d)\n", ret); iavf_shutdown_adminq(hw); goto err; } @@ -2070,7 +2184,7 @@ static void iavf_init_version_check(struct iavf_adapter *adapter) /* aq msg sent, awaiting reply */ err = iavf_verify_api_ver(adapter); if (err) { - if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) + if (err == -EALREADY) err = iavf_send_api_ver(adapter); else dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n", @@ -2171,11 +2285,11 @@ static void iavf_init_get_resources(struct iavf_adapter *adapter) } } err = iavf_get_vf_config(adapter); - if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) { + if (err == -EALREADY) { err = iavf_send_vf_config_msg(adapter); goto err_alloc; - } else if (err == IAVF_ERR_PARAM) { - /* We only get ERR_PARAM if the device is in a very bad + } else if (err == -EINVAL) { + /* We only get -EINVAL if the device is in a very bad * state or if we've been disabled for previous bad * behavior. Either way, we're done now. */ @@ -2189,26 +2303,18 @@ static void iavf_init_get_resources(struct iavf_adapter *adapter) } err = iavf_parse_vf_resource_msg(adapter); - if (err) - goto err_alloc; - - err = iavf_send_vf_offload_vlan_v2_msg(adapter); - if (err == -EOPNOTSUPP) { - /* underlying PF doesn't support VIRTCHNL_VF_OFFLOAD_VLAN_V2, so - * go directly to finishing initialization - */ - iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); - return; - } else if (err) { - dev_err(&pdev->dev, "Unable to send offload vlan v2 request (%d)\n", + if (err) { + dev_err(&pdev->dev, "Failed to parse VF resource message from PF (%d)\n", err); goto err_alloc; } - - /* underlying PF supports VIRTCHNL_VF_OFFLOAD_VLAN_V2, so update the - * state accordingly + /* Some features require additional messages to negotiate extended + * capabilities. These are processed in sequence by the + * __IAVF_INIT_EXTENDED_CAPS driver state. */ - iavf_change_state(adapter, __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS); + adapter->extended_caps = IAVF_EXTENDED_CAPS; + + iavf_change_state(adapter, __IAVF_INIT_EXTENDED_CAPS); return; err_alloc: @@ -2219,35 +2325,93 @@ err: } /** - * iavf_init_get_offload_vlan_v2_caps - part of driver startup + * iavf_init_send_offload_vlan_v2_caps - part of initializing VLAN V2 caps * @adapter: board private structure * - * Function processes __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS driver state if the - * VF negotiates VIRTCHNL_VF_OFFLOAD_VLAN_V2. If VIRTCHNL_VF_OFFLOAD_VLAN_V2 is - * not negotiated, then this state will never be entered. + * Function processes send of the extended VLAN V2 capability message to the + * PF. Must clear IAVF_EXTENDED_CAP_RECV_VLAN_V2 if the message is not sent, + * e.g. due to PF not negotiating VIRTCHNL_VF_OFFLOAD_VLAN_V2. + */ +static void iavf_init_send_offload_vlan_v2_caps(struct iavf_adapter *adapter) +{ + int ret; + + WARN_ON(!(adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_VLAN_V2)); + + ret = iavf_send_vf_offload_vlan_v2_msg(adapter); + if (ret && ret == -EOPNOTSUPP) { + /* PF does not support VIRTCHNL_VF_OFFLOAD_V2. In this case, + * we did not send the capability exchange message and do not + * expect a response. + */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_VLAN_V2; + } + + /* We sent the message, so move on to the next step */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_SEND_VLAN_V2; +} + +/** + * iavf_init_recv_offload_vlan_v2_caps - part of initializing VLAN V2 caps + * @adapter: board private structure + * + * Function processes receipt of the extended VLAN V2 capability message from + * the PF. **/ -static void iavf_init_get_offload_vlan_v2_caps(struct iavf_adapter *adapter) +static void iavf_init_recv_offload_vlan_v2_caps(struct iavf_adapter *adapter) { int ret; - WARN_ON(adapter->state != __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS); + WARN_ON(!(adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_VLAN_V2)); memset(&adapter->vlan_v2_caps, 0, sizeof(adapter->vlan_v2_caps)); ret = iavf_get_vf_vlan_v2_caps(adapter); - if (ret) { - if (ret == IAVF_ERR_ADMIN_QUEUE_NO_WORK) - iavf_send_vf_offload_vlan_v2_msg(adapter); + if (ret) goto err; - } - iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); + /* We've processed receipt of the VLAN V2 caps message */ + adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_VLAN_V2; return; err: + /* We didn't receive a reply. Make sure we try sending again when + * __IAVF_INIT_FAILED attempts to recover. + */ + adapter->extended_caps |= IAVF_EXTENDED_CAP_SEND_VLAN_V2; iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** + * iavf_init_process_extended_caps - Part of driver startup + * @adapter: board private structure + * + * Function processes __IAVF_INIT_EXTENDED_CAPS driver state. This state + * handles negotiating capabilities for features which require an additional + * message. + * + * Once all extended capabilities exchanges are finished, the driver will + * transition into __IAVF_INIT_CONFIG_ADAPTER. + */ +static void iavf_init_process_extended_caps(struct iavf_adapter *adapter) +{ + WARN_ON(adapter->state != __IAVF_INIT_EXTENDED_CAPS); + + /* Process capability exchange for VLAN V2 */ + if (adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_VLAN_V2) { + iavf_init_send_offload_vlan_v2_caps(adapter); + return; + } else if (adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_VLAN_V2) { + iavf_init_recv_offload_vlan_v2_caps(adapter); + return; + } + + /* When we reach here, no further extended capabilities exchanges are + * necessary, so we finally transition into __IAVF_INIT_CONFIG_ADAPTER + */ + iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); +} + +/** * iavf_init_config_adapter - last part of driver startup * @adapter: board private structure * @@ -2411,8 +2575,8 @@ static void iavf_watchdog_task(struct work_struct *work) queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(1)); return; - case __IAVF_INIT_GET_OFFLOAD_VLAN_V2_CAPS: - iavf_init_get_offload_vlan_v2_caps(adapter); + case __IAVF_INIT_EXTENDED_CAPS: + iavf_init_process_extended_caps(adapter); mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(1)); @@ -2620,6 +2784,7 @@ static void iavf_reset_task(struct work_struct *work) struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f, *ftmp; struct iavf_cloud_filter *cf; + enum iavf_status status; u32 reg_val; int i = 0, err; bool running; @@ -2721,10 +2886,12 @@ continue_reset: /* kill and reinit the admin queue */ iavf_shutdown_adminq(hw); adapter->current_op = VIRTCHNL_OP_UNKNOWN; - err = iavf_init_adminq(hw); - if (err) + status = iavf_init_adminq(hw); + if (status) { dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", - err); + status); + goto reset_err; + } adapter->aq_required = 0; if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) || @@ -4427,12 +4594,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "DMA configuration failed: 0x%x\n", err); - goto err_dma; - } + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; } err = pci_request_regions(pdev, iavf_driver_name); @@ -4754,8 +4918,6 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { - int ret; - pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -4766,8 +4928,7 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - ret = pci_register_driver(&iavf_driver); - return ret; + return pci_register_driver(&iavf_driver); } module_init(iavf_init_module); diff --git a/drivers/net/ethernet/intel/iavf/iavf_status.h b/drivers/net/ethernet/intel/iavf/iavf_status.h index 46e3d1f6b604..2ea5c7c339bc 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_status.h +++ b/drivers/net/ethernet/intel/iavf/iavf_status.h @@ -18,7 +18,7 @@ enum iavf_status { IAVF_ERR_ADAPTER_STOPPED = -9, IAVF_ERR_INVALID_MAC_ADDR = -10, IAVF_ERR_DEVICE_NOT_SUPPORTED = -11, - IAVF_ERR_MASTER_REQUESTS_PENDING = -12, + IAVF_ERR_PRIMARY_REQUESTS_PENDING = -12, IAVF_ERR_INVALID_LINK_SETTINGS = -13, IAVF_ERR_AUTONEG_NOT_COMPLETE = -14, IAVF_ERR_RESET_FAILED = -15, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 8cbe7ad1347c..978f651c6b09 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -374,29 +374,60 @@ static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector, return &q_vector->rx == rc; } -static inline unsigned int iavf_itr_divisor(struct iavf_q_vector *q_vector) +#define IAVF_AIM_MULTIPLIER_100G 2560 +#define IAVF_AIM_MULTIPLIER_50G 1280 +#define IAVF_AIM_MULTIPLIER_40G 1024 +#define IAVF_AIM_MULTIPLIER_20G 512 +#define IAVF_AIM_MULTIPLIER_10G 256 +#define IAVF_AIM_MULTIPLIER_1G 32 + +static unsigned int iavf_mbps_itr_multiplier(u32 speed_mbps) { - unsigned int divisor; + switch (speed_mbps) { + case SPEED_100000: + return IAVF_AIM_MULTIPLIER_100G; + case SPEED_50000: + return IAVF_AIM_MULTIPLIER_50G; + case SPEED_40000: + return IAVF_AIM_MULTIPLIER_40G; + case SPEED_25000: + case SPEED_20000: + return IAVF_AIM_MULTIPLIER_20G; + case SPEED_10000: + default: + return IAVF_AIM_MULTIPLIER_10G; + case SPEED_1000: + case SPEED_100: + return IAVF_AIM_MULTIPLIER_1G; + } +} - switch (q_vector->adapter->link_speed) { +static unsigned int +iavf_virtchnl_itr_multiplier(enum virtchnl_link_speed speed_virtchnl) +{ + switch (speed_virtchnl) { case VIRTCHNL_LINK_SPEED_40GB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 1024; - break; + return IAVF_AIM_MULTIPLIER_40G; case VIRTCHNL_LINK_SPEED_25GB: case VIRTCHNL_LINK_SPEED_20GB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 512; - break; - default: + return IAVF_AIM_MULTIPLIER_20G; case VIRTCHNL_LINK_SPEED_10GB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 256; - break; + default: + return IAVF_AIM_MULTIPLIER_10G; case VIRTCHNL_LINK_SPEED_1GB: case VIRTCHNL_LINK_SPEED_100MB: - divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 32; - break; + return IAVF_AIM_MULTIPLIER_1G; } +} - return divisor; +static unsigned int iavf_itr_divisor(struct iavf_adapter *adapter) +{ + if (ADV_LINK_SUPPORT(adapter)) + return IAVF_ITR_ADAPTIVE_MIN_INC * + iavf_mbps_itr_multiplier(adapter->link_speed_mbps); + else + return IAVF_ITR_ADAPTIVE_MIN_INC * + iavf_virtchnl_itr_multiplier(adapter->link_speed); } /** @@ -586,8 +617,9 @@ adjust_by_size: * Use addition as we have already recorded the new latency flag * for the ITR value. */ - itr += DIV_ROUND_UP(avg_wire_size, iavf_itr_divisor(q_vector)) * - IAVF_ITR_ADAPTIVE_MIN_INC; + itr += DIV_ROUND_UP(avg_wire_size, + iavf_itr_divisor(q_vector->adapter)) * + IAVF_ITR_ADAPTIVE_MIN_INC; if ((itr & IAVF_ITR_MASK) > IAVF_ITR_ADAPTIVE_MAX_USECS) { itr &= IAVF_ITR_ADAPTIVE_LATENCY; diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 5263cefe46f5..782450d5c12f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -22,17 +22,17 @@ static int iavf_send_pf_msg(struct iavf_adapter *adapter, enum virtchnl_ops op, u8 *msg, u16 len) { struct iavf_hw *hw = &adapter->hw; - enum iavf_status err; + enum iavf_status status; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) return 0; /* nothing to see here, move along */ - err = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); - if (err) - dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", - op, iavf_stat_str(hw, err), + status = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); + if (status) + dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, status %s, aq_err %s\n", + op, iavf_stat_str(hw, status), iavf_aq_str(hw, hw->aq.asq_last_status)); - return err; + return iavf_status_to_errno(status); } /** @@ -55,6 +55,41 @@ int iavf_send_api_ver(struct iavf_adapter *adapter) } /** + * iavf_poll_virtchnl_msg + * @hw: HW configuration structure + * @event: event to populate on success + * @op_to_poll: requested virtchnl op to poll for + * + * Initialize poll for virtchnl msg matching the requested_op. Returns 0 + * if a message of the correct opcode is in the queue or an error code + * if no message matching the op code is waiting and other failures. + */ +static int +iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, + enum virtchnl_ops op_to_poll) +{ + enum virtchnl_ops received_op; + enum iavf_status status; + u32 v_retval; + + while (1) { + /* When the AQ is empty, iavf_clean_arq_element will return + * nonzero and this loop will terminate. + */ + status = iavf_clean_arq_element(hw, event, NULL); + if (status != IAVF_SUCCESS) + return iavf_status_to_errno(status); + received_op = + (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + if (op_to_poll == received_op) + break; + } + + v_retval = le32_to_cpu(event->desc.cookie_low); + return virtchnl_status_to_errno((enum virtchnl_status_code)v_retval); +} + +/** * iavf_verify_api_ver * @adapter: adapter structure * @@ -65,55 +100,28 @@ int iavf_send_api_ver(struct iavf_adapter *adapter) **/ int iavf_verify_api_ver(struct iavf_adapter *adapter) { - struct virtchnl_version_info *pf_vvi; - struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; - enum virtchnl_ops op; - enum iavf_status err; + int err; event.buf_len = IAVF_MAX_AQ_BUF_SIZE; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } - - while (1) { - err = iavf_clean_arq_element(hw, &event, NULL); - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - if (err) - goto out_alloc; - op = - (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_VERSION) - break; - } - + event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; - err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); - if (err) - goto out_alloc; + err = iavf_poll_virtchnl_msg(&adapter->hw, &event, VIRTCHNL_OP_VERSION); + if (!err) { + struct virtchnl_version_info *pf_vvi = + (struct virtchnl_version_info *)event.msg_buf; + adapter->pf_version = *pf_vvi; - if (op != VIRTCHNL_OP_VERSION) { - dev_info(&adapter->pdev->dev, "Invalid reply type %d from PF\n", - op); - err = -EIO; - goto out_alloc; + if (pf_vvi->major > VIRTCHNL_VERSION_MAJOR || + (pf_vvi->major == VIRTCHNL_VERSION_MAJOR && + pf_vvi->minor > VIRTCHNL_VERSION_MINOR)) + err = -EIO; } - pf_vvi = (struct virtchnl_version_info *)event.msg_buf; - adapter->pf_version = *pf_vvi; - - if ((pf_vvi->major > VIRTCHNL_VERSION_MAJOR) || - ((pf_vvi->major == VIRTCHNL_VERSION_MAJOR) && - (pf_vvi->minor > VIRTCHNL_VERSION_MINOR))) - err = -EIO; - -out_alloc: kfree(event.msg_buf); -out: + return err; } @@ -208,33 +216,17 @@ int iavf_get_vf_config(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; - enum virtchnl_ops op; - enum iavf_status err; u16 len; + int err; - len = sizeof(struct virtchnl_vf_resource) + + len = sizeof(struct virtchnl_vf_resource) + IAVF_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource); event.buf_len = len; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } - - while (1) { - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - err = iavf_clean_arq_element(hw, &event, NULL); - if (err) - goto out_alloc; - op = - (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_GET_VF_RESOURCES) - break; - } + event.msg_buf = kzalloc(len, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; - err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); + err = iavf_poll_virtchnl_msg(hw, &event, VIRTCHNL_OP_GET_VF_RESOURCES); memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len)); /* some PFs send more queues than we should have so validate that @@ -243,48 +235,32 @@ int iavf_get_vf_config(struct iavf_adapter *adapter) if (!err) iavf_validate_num_queues(adapter); iavf_vf_parse_hw_config(hw, adapter->vf_res); -out_alloc: + kfree(event.msg_buf); -out: + return err; } int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter) { - struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; - enum virtchnl_ops op; - enum iavf_status err; + int err; u16 len; - len = sizeof(struct virtchnl_vlan_caps); + len = sizeof(struct virtchnl_vlan_caps); event.buf_len = len; - event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); - if (!event.msg_buf) { - err = -ENOMEM; - goto out; - } - - while (1) { - /* When the AQ is empty, iavf_clean_arq_element will return - * nonzero and this loop will terminate. - */ - err = iavf_clean_arq_element(hw, &event, NULL); - if (err) - goto out_alloc; - op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - if (op == VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS) - break; - } + event.msg_buf = kzalloc(len, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; - err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); - if (err) - goto out_alloc; + err = iavf_poll_virtchnl_msg(&adapter->hw, &event, + VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS); + if (!err) + memcpy(&adapter->vlan_v2_caps, event.msg_buf, + min(event.msg_len, len)); - memcpy(&adapter->vlan_v2_caps, event.msg_buf, min(event.msg_len, len)); -out_alloc: kfree(event.msg_buf); -out: + return err; } @@ -454,6 +430,20 @@ void iavf_map_queues(struct iavf_adapter *adapter) } /** + * iavf_set_mac_addr_type - Set the correct request type from the filter type + * @virtchnl_ether_addr: pointer to requested list element + * @filter: pointer to requested filter + **/ +static void +iavf_set_mac_addr_type(struct virtchnl_ether_addr *virtchnl_ether_addr, + const struct iavf_mac_filter *filter) +{ + virtchnl_ether_addr->type = filter->is_primary ? + VIRTCHNL_ETHER_ADDR_PRIMARY : + VIRTCHNL_ETHER_ADDR_EXTRA; +} + +/** * iavf_add_ether_addrs * @adapter: adapter structure * @@ -508,6 +498,7 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter) list_for_each_entry(f, &adapter->mac_filter_list, list) { if (f->add) { ether_addr_copy(veal->list[i].addr, f->macaddr); + iavf_set_mac_addr_type(&veal->list[i], f); i++; f->add = false; if (i == count) @@ -577,6 +568,7 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { if (f->remove) { ether_addr_copy(veal->list[i].addr, f->macaddr); + iavf_set_mac_addr_type(&veal->list[i], f); i++; list_del(&f->list); kfree(f); @@ -1827,11 +1819,13 @@ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter) * * Request that the PF reset this VF. No response is expected. **/ -void iavf_request_reset(struct iavf_adapter *adapter) +int iavf_request_reset(struct iavf_adapter *adapter) { + int err; /* Don't check CURRENT_OP - this is always higher priority */ - iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); + err = iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0); adapter->current_op = VIRTCHNL_OP_UNKNOWN; + return err; } /** diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index c36faa7d1471..44b8464b7663 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -18,8 +18,12 @@ ice-y := ice_main.o \ ice_txrx_lib.o \ ice_txrx.o \ ice_fltr.o \ + ice_pf_vsi_vlan_ops.o \ + ice_vsi_vlan_ops.o \ + ice_vsi_vlan_lib.o \ ice_fdir.o \ ice_ethtool_fdir.o \ + ice_vlan_mode.o \ ice_flex_pipe.o \ ice_flow.o \ ice_idc.o \ @@ -29,9 +33,14 @@ ice-y := ice_main.o \ ice_ethtool.o \ ice_repr.o \ ice_tc_lib.o -ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o -ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o +ice-$(CONFIG_PCI_IOV) += \ + ice_virtchnl_allowlist.o \ + ice_virtchnl_fdir.o \ + ice_sriov.o \ + ice_vf_vsi_vlan_ops.o \ + ice_virtchnl_pf.o ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o +ice-$(CONFIG_TTY) += ice_gnss.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index bea1d1e39fa2..c7a987c89423 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -72,6 +72,8 @@ #include "ice_repr.h" #include "ice_eswitch.h" #include "ice_lag.h" +#include "ice_vsi_vlan_ops.h" +#include "ice_gnss.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -107,7 +109,6 @@ /* All VF control VSIs share the same IRQ, so assign a unique ID for them */ #define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff -#define ICE_INVAL_VFID 256 #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ @@ -183,6 +184,7 @@ enum ice_feature { ICE_F_DSCP, ICE_F_SMA_CTRL, + ICE_F_GNSS, ICE_F_MAX }; @@ -330,7 +332,7 @@ struct ice_vsi { u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ - s16 vf_id; /* VF ID for SR-IOV VSIs */ + struct ice_vf *vf; /* VF associated with this VSI */ u16 ethtype; /* Ethernet protocol for pause frame */ u16 num_gfltr; @@ -367,6 +369,8 @@ struct ice_vsi { u8 irqs_ready:1; u8 current_isup:1; /* Sync 'link up' logging */ u8 stat_offsets_loaded:1; + struct ice_vsi_vlan_ops inner_vlan_ops; + struct ice_vsi_vlan_ops outer_vlan_ops; u16 num_vlan; /* queue information */ @@ -467,7 +471,6 @@ enum ice_pf_flags { ICE_FLAG_FD_ENA, ICE_FLAG_PTP_SUPPORTED, /* PTP is supported by NVM */ ICE_FLAG_PTP, /* PTP is enabled by software */ - ICE_FLAG_AUX_ENA, ICE_FLAG_ADV_FEATURES, ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */ ICE_FLAG_CLS_FLOWER, @@ -481,9 +484,11 @@ enum ice_pf_flags { ICE_FLAG_LEGACY_RX, ICE_FLAG_VF_TRUE_PROMISC_ENA, ICE_FLAG_MDD_AUTO_RESET_VF, + ICE_FLAG_VF_VLAN_PRUNING, ICE_FLAG_LINK_LENIENT_MODE_ENA, ICE_FLAG_PLUG_AUX_DEV, ICE_FLAG_MTU_CHANGED, + ICE_FLAG_GNSS, /* GNSS successfully initialized */ ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -524,15 +529,7 @@ struct ice_pf { struct ice_vsi **vsi; /* VSIs created by the driver */ struct ice_sw *first_sw; /* first switch created by firmware */ u16 eswitch_mode; /* current mode of eswitch */ - /* Virtchnl/SR-IOV config info */ - struct ice_vf *vf; - u16 num_alloc_vfs; /* actual number of VFs allocated */ - u16 num_vfs_supported; /* num VFs supported for this PF */ - u16 num_qps_per_vf; - u16 num_msix_per_vf; - /* used to ratelimit the MDD event logging */ - unsigned long last_printed_mdd_jiffies; - DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT); + struct ice_vfs vfs; DECLARE_BITMAP(features, ICE_F_MAX); DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); @@ -547,6 +544,9 @@ struct ice_pf { struct mutex tc_mutex; /* lock to protect TC changes */ u32 msg_enable; struct ice_ptp ptp; + struct tty_driver *ice_gnss_tty_driver; + struct tty_port gnss_tty_port; + struct gnss_serial *gnss_serial; u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */ u16 rdma_base_vector; @@ -887,7 +887,6 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf) { if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) { set_bit(ICE_FLAG_RDMA_ENA, pf->flags); - set_bit(ICE_FLAG_AUX_ENA, pf->flags); set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags); } } @@ -909,6 +908,5 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) ice_unplug_aux_dev(pf); clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - clear_bit(ICE_FLAG_AUX_ENA, pf->flags); } #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index ad1dcfa5ff65..b25e27c4d887 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -226,6 +226,15 @@ struct ice_aqc_get_sw_cfg_resp_elem { #define ICE_AQC_GET_SW_CONF_RESP_IS_VF BIT(15) }; +/* Set Port parameters, (direct, 0x0203) */ +struct ice_aqc_set_port_params { + __le16 cmd_flags; +#define ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA BIT(2) + __le16 bad_frame_vsi; + __le16 swid; + u8 reserved[10]; +}; + /* These resource type defines are used for all switch resource * commands where a resource type is required, such as: * Get Resource Allocation command (indirect 0x0204) @@ -283,6 +292,40 @@ struct ice_aqc_alloc_free_res_elem { struct ice_aqc_res_elem elem[]; }; +/* Request buffer for Set VLAN Mode AQ command (indirect 0x020C) */ +struct ice_aqc_set_vlan_mode { + u8 reserved; + u8 l2tag_prio_tagging; +#define ICE_AQ_VLAN_PRIO_TAG_S 0 +#define ICE_AQ_VLAN_PRIO_TAG_M (0x7 << ICE_AQ_VLAN_PRIO_TAG_S) +#define ICE_AQ_VLAN_PRIO_TAG_NOT_SUPPORTED 0x0 +#define ICE_AQ_VLAN_PRIO_TAG_STAG 0x1 +#define ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG 0x2 +#define ICE_AQ_VLAN_PRIO_TAG_OUTER_VLAN 0x3 +#define ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG 0x4 +#define ICE_AQ_VLAN_PRIO_TAG_MAX 0x4 +#define ICE_AQ_VLAN_PRIO_TAG_ERROR 0x7 + u8 l2tag_reserved[64]; + u8 rdma_packet; +#define ICE_AQ_VLAN_RDMA_TAG_S 0 +#define ICE_AQ_VLAN_RDMA_TAG_M (0x3F << ICE_AQ_VLAN_RDMA_TAG_S) +#define ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING 0x10 +#define ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING 0x1A + u8 rdma_reserved[2]; + u8 mng_vlan_prot_id; +#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER 0x10 +#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER 0x11 + u8 prot_id_reserved[30]; +}; + +/* Response buffer for Get VLAN Mode AQ command (indirect 0x020D) */ +struct ice_aqc_get_vlan_mode { + u8 vlan_mode; +#define ICE_AQ_VLAN_MODE_DVM_ENA BIT(0) + u8 l2tag_prio_tagging; + u8 reserved[98]; +}; + /* Add VSI (indirect 0x0210) * Update VSI (indirect 0x0211) * Get VSI (indirect 0x0212) @@ -343,108 +386,113 @@ struct ice_aqc_vsi_props { #define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE BIT(7) u8 sw_flags2; #define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S 0 -#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M \ - (0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S) +#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M (0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S) #define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA BIT(0) #define ICE_AQ_VSI_SW_FLAG_LAN_ENA BIT(4) u8 veb_stat_id; #define ICE_AQ_VSI_SW_VEB_STAT_ID_S 0 -#define ICE_AQ_VSI_SW_VEB_STAT_ID_M (0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S) +#define ICE_AQ_VSI_SW_VEB_STAT_ID_M (0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S) #define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID BIT(5) /* security section */ u8 sec_flags; #define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD BIT(0) #define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF BIT(2) -#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S 4 -#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M (0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S) +#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S 4 +#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M (0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S) #define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA BIT(0) u8 sec_reserved; /* VLAN section */ - __le16 pvid; /* VLANS include priority bits */ - u8 pvlan_reserved[2]; - u8 vlan_flags; -#define ICE_AQ_VSI_VLAN_MODE_S 0 -#define ICE_AQ_VSI_VLAN_MODE_M (0x3 << ICE_AQ_VSI_VLAN_MODE_S) -#define ICE_AQ_VSI_VLAN_MODE_UNTAGGED 0x1 -#define ICE_AQ_VSI_VLAN_MODE_TAGGED 0x2 -#define ICE_AQ_VSI_VLAN_MODE_ALL 0x3 -#define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2) -#define ICE_AQ_VSI_VLAN_EMOD_S 3 -#define ICE_AQ_VSI_VLAN_EMOD_M (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) -#define ICE_AQ_VSI_VLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_VLAN_EMOD_S) -#define ICE_AQ_VSI_VLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_VLAN_EMOD_S) -#define ICE_AQ_VSI_VLAN_EMOD_STR (0x2 << ICE_AQ_VSI_VLAN_EMOD_S) -#define ICE_AQ_VSI_VLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_VLAN_EMOD_S) - u8 pvlan_reserved2[3]; + __le16 port_based_inner_vlan; /* VLANS include priority bits */ + u8 inner_vlan_reserved[2]; + u8 inner_vlan_flags; +#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_S 0 +#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_TX_MODE_S) +#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED 0x1 +#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTTAGGED 0x2 +#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL 0x3 +#define ICE_AQ_VSI_INNER_VLAN_INSERT_PVID BIT(2) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_S 3 +#define ICE_AQ_VSI_INNER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH (0x0 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP (0x1 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR (0x2 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) + u8 inner_vlan_reserved2[3]; /* ingress egress up sections */ __le32 ingress_table; /* bitmap, 3 bits per up */ -#define ICE_AQ_VSI_UP_TABLE_UP0_S 0 -#define ICE_AQ_VSI_UP_TABLE_UP0_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S) -#define ICE_AQ_VSI_UP_TABLE_UP1_S 3 -#define ICE_AQ_VSI_UP_TABLE_UP1_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S) -#define ICE_AQ_VSI_UP_TABLE_UP2_S 6 -#define ICE_AQ_VSI_UP_TABLE_UP2_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S) -#define ICE_AQ_VSI_UP_TABLE_UP3_S 9 -#define ICE_AQ_VSI_UP_TABLE_UP3_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S) -#define ICE_AQ_VSI_UP_TABLE_UP4_S 12 -#define ICE_AQ_VSI_UP_TABLE_UP4_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S) -#define ICE_AQ_VSI_UP_TABLE_UP5_S 15 -#define ICE_AQ_VSI_UP_TABLE_UP5_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S) -#define ICE_AQ_VSI_UP_TABLE_UP6_S 18 -#define ICE_AQ_VSI_UP_TABLE_UP6_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S) -#define ICE_AQ_VSI_UP_TABLE_UP7_S 21 -#define ICE_AQ_VSI_UP_TABLE_UP7_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S) +#define ICE_AQ_VSI_UP_TABLE_UP0_S 0 +#define ICE_AQ_VSI_UP_TABLE_UP0_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S) +#define ICE_AQ_VSI_UP_TABLE_UP1_S 3 +#define ICE_AQ_VSI_UP_TABLE_UP1_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S) +#define ICE_AQ_VSI_UP_TABLE_UP2_S 6 +#define ICE_AQ_VSI_UP_TABLE_UP2_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S) +#define ICE_AQ_VSI_UP_TABLE_UP3_S 9 +#define ICE_AQ_VSI_UP_TABLE_UP3_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S) +#define ICE_AQ_VSI_UP_TABLE_UP4_S 12 +#define ICE_AQ_VSI_UP_TABLE_UP4_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S) +#define ICE_AQ_VSI_UP_TABLE_UP5_S 15 +#define ICE_AQ_VSI_UP_TABLE_UP5_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S) +#define ICE_AQ_VSI_UP_TABLE_UP6_S 18 +#define ICE_AQ_VSI_UP_TABLE_UP6_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S) +#define ICE_AQ_VSI_UP_TABLE_UP7_S 21 +#define ICE_AQ_VSI_UP_TABLE_UP7_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S) __le32 egress_table; /* same defines as for ingress table */ /* outer tags section */ - __le16 outer_tag; - u8 outer_tag_flags; -#define ICE_AQ_VSI_OUTER_TAG_MODE_S 0 -#define ICE_AQ_VSI_OUTER_TAG_MODE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_MODE_S) -#define ICE_AQ_VSI_OUTER_TAG_NOTHING 0x0 -#define ICE_AQ_VSI_OUTER_TAG_REMOVE 0x1 -#define ICE_AQ_VSI_OUTER_TAG_COPY 0x2 -#define ICE_AQ_VSI_OUTER_TAG_TYPE_S 2 -#define ICE_AQ_VSI_OUTER_TAG_TYPE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S) -#define ICE_AQ_VSI_OUTER_TAG_NONE 0x0 -#define ICE_AQ_VSI_OUTER_TAG_STAG 0x1 -#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100 0x2 -#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100 0x3 -#define ICE_AQ_VSI_OUTER_TAG_INSERT BIT(4) -#define ICE_AQ_VSI_OUTER_TAG_ACCEPT_HOST BIT(6) - u8 outer_tag_reserved; + __le16 port_based_outer_vlan; + u8 outer_vlan_flags; +#define ICE_AQ_VSI_OUTER_VLAN_EMODE_S 0 +#define ICE_AQ_VSI_OUTER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_OUTER_VLAN_EMODE_S) +#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH 0x0 +#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_UP 0x1 +#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW 0x2 +#define ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING 0x3 +#define ICE_AQ_VSI_OUTER_TAG_TYPE_S 2 +#define ICE_AQ_VSI_OUTER_TAG_TYPE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S) +#define ICE_AQ_VSI_OUTER_TAG_NONE 0x0 +#define ICE_AQ_VSI_OUTER_TAG_STAG 0x1 +#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100 0x2 +#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100 0x3 +#define ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT BIT(4) +#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S 5 +#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M (0x3 << ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) +#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED 0x1 +#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTTAGGED 0x2 +#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL 0x3 +#define ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC BIT(7) + u8 outer_vlan_reserved; /* queue mapping section */ __le16 mapping_flags; -#define ICE_AQ_VSI_Q_MAP_CONTIG 0x0 -#define ICE_AQ_VSI_Q_MAP_NONCONTIG BIT(0) +#define ICE_AQ_VSI_Q_MAP_CONTIG 0x0 +#define ICE_AQ_VSI_Q_MAP_NONCONTIG BIT(0) __le16 q_mapping[16]; -#define ICE_AQ_VSI_Q_S 0 -#define ICE_AQ_VSI_Q_M (0x7FF << ICE_AQ_VSI_Q_S) +#define ICE_AQ_VSI_Q_S 0 +#define ICE_AQ_VSI_Q_M (0x7FF << ICE_AQ_VSI_Q_S) __le16 tc_mapping[8]; -#define ICE_AQ_VSI_TC_Q_OFFSET_S 0 -#define ICE_AQ_VSI_TC_Q_OFFSET_M (0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S) -#define ICE_AQ_VSI_TC_Q_NUM_S 11 -#define ICE_AQ_VSI_TC_Q_NUM_M (0xF << ICE_AQ_VSI_TC_Q_NUM_S) +#define ICE_AQ_VSI_TC_Q_OFFSET_S 0 +#define ICE_AQ_VSI_TC_Q_OFFSET_M (0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S) +#define ICE_AQ_VSI_TC_Q_NUM_S 11 +#define ICE_AQ_VSI_TC_Q_NUM_M (0xF << ICE_AQ_VSI_TC_Q_NUM_S) /* queueing option section */ u8 q_opt_rss; -#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S 0 -#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) -#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI 0x0 -#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF 0x2 -#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL 0x3 -#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2 -#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S) -#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6 -#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) -#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S 0 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI 0x0 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF 0x2 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL 0x3 +#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2 +#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6 +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) u8 q_opt_tc; -#define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0 -#define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S) -#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR BIT(7) +#define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0 +#define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S) +#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR BIT(7) u8 q_opt_flags; -#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN BIT(0) +#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN BIT(0) u8 q_opt_reserved[3]; /* outer up section */ __le32 outer_up_table; /* same structure and defines as ingress tbl */ @@ -452,27 +500,27 @@ struct ice_aqc_vsi_props { __le16 sect_10_reserved; /* flow director section */ __le16 fd_options; -#define ICE_AQ_VSI_FD_ENABLE BIT(0) -#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE BIT(1) -#define ICE_AQ_VSI_FD_PROG_ENABLE BIT(3) +#define ICE_AQ_VSI_FD_ENABLE BIT(0) +#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE BIT(1) +#define ICE_AQ_VSI_FD_PROG_ENABLE BIT(3) __le16 max_fd_fltr_dedicated; __le16 max_fd_fltr_shared; __le16 fd_def_q; -#define ICE_AQ_VSI_FD_DEF_Q_S 0 -#define ICE_AQ_VSI_FD_DEF_Q_M (0x7FF << ICE_AQ_VSI_FD_DEF_Q_S) -#define ICE_AQ_VSI_FD_DEF_GRP_S 12 -#define ICE_AQ_VSI_FD_DEF_GRP_M (0x7 << ICE_AQ_VSI_FD_DEF_GRP_S) +#define ICE_AQ_VSI_FD_DEF_Q_S 0 +#define ICE_AQ_VSI_FD_DEF_Q_M (0x7FF << ICE_AQ_VSI_FD_DEF_Q_S) +#define ICE_AQ_VSI_FD_DEF_GRP_S 12 +#define ICE_AQ_VSI_FD_DEF_GRP_M (0x7 << ICE_AQ_VSI_FD_DEF_GRP_S) __le16 fd_report_opt; -#define ICE_AQ_VSI_FD_REPORT_Q_S 0 -#define ICE_AQ_VSI_FD_REPORT_Q_M (0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S) -#define ICE_AQ_VSI_FD_DEF_PRIORITY_S 12 -#define ICE_AQ_VSI_FD_DEF_PRIORITY_M (0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S) -#define ICE_AQ_VSI_FD_DEF_DROP BIT(15) +#define ICE_AQ_VSI_FD_REPORT_Q_S 0 +#define ICE_AQ_VSI_FD_REPORT_Q_M (0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S) +#define ICE_AQ_VSI_FD_DEF_PRIORITY_S 12 +#define ICE_AQ_VSI_FD_DEF_PRIORITY_M (0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S) +#define ICE_AQ_VSI_FD_DEF_DROP BIT(15) /* PASID section */ __le32 pasid_id; -#define ICE_AQ_VSI_PASID_ID_S 0 -#define ICE_AQ_VSI_PASID_ID_M (0xFFFFF << ICE_AQ_VSI_PASID_ID_S) -#define ICE_AQ_VSI_PASID_ID_VALID BIT(31) +#define ICE_AQ_VSI_PASID_ID_S 0 +#define ICE_AQ_VSI_PASID_ID_M (0xFFFFF << ICE_AQ_VSI_PASID_ID_S) +#define ICE_AQ_VSI_PASID_ID_VALID BIT(31) u8 reserved[24]; }; @@ -489,9 +537,13 @@ struct ice_aqc_add_get_recipe { struct ice_aqc_recipe_content { u8 rid; +#define ICE_AQ_RECIPE_ID_S 0 +#define ICE_AQ_RECIPE_ID_M (0x3F << ICE_AQ_RECIPE_ID_S) #define ICE_AQ_RECIPE_ID_IS_ROOT BIT(7) #define ICE_AQ_SW_ID_LKUP_IDX 0 u8 lkup_indx[5]; +#define ICE_AQ_RECIPE_LKUP_DATA_S 0 +#define ICE_AQ_RECIPE_LKUP_DATA_M (0x3F << ICE_AQ_RECIPE_LKUP_DATA_S) #define ICE_AQ_RECIPE_LKUP_IGNORE BIT(7) #define ICE_AQ_SW_ID_LKUP_MASK 0x00FF __le16 mask[5]; @@ -502,15 +554,25 @@ struct ice_aqc_recipe_content { u8 rsvd0[3]; u8 act_ctrl_join_priority; u8 act_ctrl_fwd_priority; +#define ICE_AQ_RECIPE_FWD_PRIORITY_S 0 +#define ICE_AQ_RECIPE_FWD_PRIORITY_M (0xF << ICE_AQ_RECIPE_FWD_PRIORITY_S) u8 act_ctrl; +#define ICE_AQ_RECIPE_ACT_NEED_PASS_L2 BIT(0) +#define ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2 BIT(1) #define ICE_AQ_RECIPE_ACT_INV_ACT BIT(2) +#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_S 4 +#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_M (0x3 << ICE_AQ_RECIPE_ACT_PRUNE_INDX_S) u8 rsvd1; __le32 dflt_act; +#define ICE_AQ_RECIPE_DFLT_ACT_S 0 +#define ICE_AQ_RECIPE_DFLT_ACT_M (0x7FFFF << ICE_AQ_RECIPE_DFLT_ACT_S) +#define ICE_AQ_RECIPE_DFLT_ACT_VALID BIT(31) }; struct ice_aqc_recipe_data_elem { u8 recipe_indx; u8 resp_bits; +#define ICE_AQ_RECIPE_WAS_UPDATED BIT(0) u8 rsvd0[2]; u8 recipe_bitmap[8]; u8 rsvd1[4]; @@ -1339,6 +1401,24 @@ struct ice_aqc_get_link_topo { u8 rsvd[9]; }; +/* Read I2C (direct, 0x06E2) */ +struct ice_aqc_i2c { + struct ice_aqc_link_topo_addr topo_addr; + __le16 i2c_addr; + u8 i2c_params; +#define ICE_AQC_I2C_DATA_SIZE_M GENMASK(3, 0) +#define ICE_AQC_I2C_USE_REPEATED_START BIT(7) + + u8 rsvd; + __le16 i2c_bus_addr; + u8 rsvd2[4]; +}; + +/* Read I2C Response (direct, 0x06E2) */ +struct ice_aqc_read_i2c_resp { + u8 i2c_data[16]; +}; + /* Set Port Identification LED (direct, 0x06E9) */ struct ice_aqc_set_port_id_led { u8 lport_num; @@ -1883,7 +1963,7 @@ struct ice_aqc_get_clear_fw_log { }; /* Download Package (indirect 0x0C40) */ -/* Also used for Update Package (indirect 0x0C42) */ +/* Also used for Update Package (indirect 0x0C41 and 0x0C42) */ struct ice_aqc_download_pkg { u8 flags; #define ICE_AQC_DOWNLOAD_PKG_LAST_BUF 0x01 @@ -2009,6 +2089,7 @@ struct ice_aq_desc { struct ice_aqc_sff_eeprom read_write_sff_param; struct ice_aqc_set_port_id_led set_port_id_led; struct ice_aqc_get_sw_cfg get_sw_conf; + struct ice_aqc_set_port_params set_port_params; struct ice_aqc_sw_rules sw_rules; struct ice_aqc_add_get_recipe add_get_recipe; struct ice_aqc_recipe_to_profile recipe_to_profile; @@ -2049,6 +2130,8 @@ struct ice_aq_desc { struct ice_aqc_get_link_status get_link_status; struct ice_aqc_event_lan_overflow lan_overflow; struct ice_aqc_get_link_topo get_link_topo; + struct ice_aqc_i2c read_i2c; + struct ice_aqc_read_i2c_resp read_i2c_resp; } params; }; @@ -2110,10 +2193,13 @@ enum ice_adminq_opc { /* internal switch commands */ ice_aqc_opc_get_sw_cfg = 0x0200, + ice_aqc_opc_set_port_params = 0x0203, /* Alloc/Free/Get Resources */ ice_aqc_opc_alloc_res = 0x0208, ice_aqc_opc_free_res = 0x0209, + ice_aqc_opc_set_vlan_mode_parameters = 0x020C, + ice_aqc_opc_get_vlan_mode_parameters = 0x020D, /* VSI commands */ ice_aqc_opc_add_vsi = 0x0210, @@ -2160,6 +2246,7 @@ enum ice_adminq_opc { ice_aqc_opc_set_event_mask = 0x0613, ice_aqc_opc_set_mac_lb = 0x0620, ice_aqc_opc_get_link_topo = 0x06E0, + ice_aqc_opc_read_i2c = 0x06E2, ice_aqc_opc_set_port_id_led = 0x06E9, ice_aqc_opc_set_gpio = 0x06EC, ice_aqc_opc_get_gpio = 0x06ED, @@ -2204,6 +2291,7 @@ enum ice_adminq_opc { /* package commands */ ice_aqc_opc_download_pkg = 0x0C40, + ice_aqc_opc_upload_section = 0x0C41, ice_aqc_opc_update_pkg = 0x0C42, ice_aqc_opc_get_pkg_info_list = 0x0C43, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1a5ece3bce79..a3094470d31d 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -5,6 +5,7 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" +#include "ice_virtchnl_pf.h" static bool ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring) { @@ -322,7 +323,7 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf break; case ICE_VSI_VF: /* Firmware expects vmvf_num to be absolute VF ID */ - tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id; + tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id; tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; break; case ICE_VSI_SWITCHDEV_CTRL: @@ -418,8 +419,22 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) */ rlan_ctx.crcstrip = 1; - /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ - rlan_ctx.l2tsel = 1; + /* L2TSEL flag defines the reported L2 Tags in the receive descriptor + * and it needs to remain 1 for non-DVM capable configurations to not + * break backward compatibility for VF drivers. Setting this field to 0 + * will cause the single/outer VLAN tag to be stripped to the L2TAG2_2ND + * field in the Rx descriptor. Setting it to 1 allows the VLAN tag to + * be stripped in L2TAG1 of the Rx descriptor, which is where VFs will + * check for the tag + */ + if (ice_is_dvm_ena(hw)) + if (vsi->type == ICE_VSI_VF && + ice_vf_is_port_vlan_ena(vsi->vf)) + rlan_ctx.l2tsel = 1; + else + rlan_ctx.l2tsel = 0; + else + rlan_ctx.l2tsel = 1; rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index e2af99a763ed..9619bdb9e49a 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1518,16 +1518,27 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, /* When a package download is in process (i.e. when the firmware's * Global Configuration Lock resource is held), only the Download - * Package, Get Version, Get Package Info List and Release Resource - * (with resource ID set to Global Config Lock) AdminQ commands are - * allowed; all others must block until the package download completes - * and the Global Config Lock is released. See also - * ice_acquire_global_cfg_lock(). + * Package, Get Version, Get Package Info List, Upload Section, + * Update Package, Set Port Parameters, Get/Set VLAN Mode Parameters, + * Add Recipe, Set Recipes to Profile Association, Get Recipe, and Get + * Recipes to Profile Association, and Release Resource (with resource + * ID set to Global Config Lock) AdminQ commands are allowed; all others + * must block until the package download completes and the Global Config + * Lock is released. See also ice_acquire_global_cfg_lock(). */ switch (le16_to_cpu(desc->opcode)) { case ice_aqc_opc_download_pkg: case ice_aqc_opc_get_pkg_info_list: case ice_aqc_opc_get_ver: + case ice_aqc_opc_upload_section: + case ice_aqc_opc_update_pkg: + case ice_aqc_opc_set_port_params: + case ice_aqc_opc_get_vlan_mode_parameters: + case ice_aqc_opc_set_vlan_mode_parameters: + case ice_aqc_opc_add_recipe: + case ice_aqc_opc_recipe_to_profile: + case ice_aqc_opc_get_recipe: + case ice_aqc_opc_get_recipe_to_profile: break; case ice_aqc_opc_release_res: if (le16_to_cpu(cmd->res_id) == ICE_AQC_RES_ID_GLBL_LOCK) @@ -2737,6 +2748,34 @@ void ice_clear_pxe_mode(struct ice_hw *hw) } /** + * ice_aq_set_port_params - set physical port parameters. + * @pi: pointer to the port info struct + * @double_vlan: if set double VLAN is enabled + * @cd: pointer to command details structure or NULL + * + * Set Physical port parameters (0x0203) + */ +int +ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan, + struct ice_sq_cd *cd) + +{ + struct ice_aqc_set_port_params *cmd; + struct ice_hw *hw = pi->hw; + struct ice_aq_desc desc; + u16 cmd_flags = 0; + + cmd = &desc.params.set_port_params; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_params); + if (double_vlan) + cmd_flags |= ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA; + cmd->cmd_flags = cpu_to_le16(cmd_flags); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** * ice_get_link_speed_based_on_phy_type - returns link speed * @phy_type_low: lower part of phy_type * @phy_type_high: higher part of phy_type @@ -4759,6 +4798,59 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, } /** + * ice_aq_read_i2c + * @hw: pointer to the hw struct + * @topo_addr: topology address for a device to communicate with + * @bus_addr: 7-bit I2C bus address + * @addr: I2C memory address (I2C offset) with up to 16 bits + * @params: I2C parameters: bit [7] - Repeated start, + * bits [6:5] data offset size, + * bit [4] - I2C address type, + * bits [3:0] - data size to read (0-16 bytes) + * @data: pointer to data (0 to 16 bytes) to be read from the I2C device + * @cd: pointer to command details structure or NULL + * + * Read I2C (0x06E2) + */ +int +ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc = { 0 }; + struct ice_aqc_i2c *cmd; + u8 data_size; + int status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_read_i2c); + cmd = &desc.params.read_i2c; + + if (!data) + return -EINVAL; + + data_size = FIELD_GET(ICE_AQC_I2C_DATA_SIZE_M, params); + + cmd->i2c_bus_addr = cpu_to_le16(bus_addr); + cmd->topo_addr = topo_addr; + cmd->i2c_params = params; + cmd->i2c_addr = addr; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (!status) { + struct ice_aqc_read_i2c_resp *resp; + u8 i; + + resp = &desc.params.read_i2c_resp; + for (i = 0; i < data_size; i++) { + *data = resp->i2c_data[i]; + data++; + } + } + + return status; +} + +/** * ice_aq_set_driver_param - Set driver parameter to share via firmware * @hw: pointer to the HW struct * @idx: parameter index to set diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 1c57097ddf0b..1efe6b2c32f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -4,6 +4,8 @@ #ifndef _ICE_COMMON_H_ #define _ICE_COMMON_H_ +#include <linux/bitfield.h> + #include "ice.h" #include "ice_type.h" #include "ice_nvm.h" @@ -85,6 +87,9 @@ int ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv, struct ice_sq_cd *cd); int +ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan, + struct ice_sq_cd *cd); +int ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, struct ice_aqc_get_phy_caps_data *caps, struct ice_sq_cd *cd); @@ -205,5 +210,9 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw); int ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add); +int +ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, + u16 bus_addr, __le16 addr, u8 params, u8 *data, + struct ice_sq_cd *cd); bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index b94d8daeaa58..add90e75f05c 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -916,7 +916,8 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, return; /* Insert 802.1p priority into VLAN header */ - if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN) || + if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN || + first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) || skb->priority != TC_PRIO_CONTROL) { first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M; /* Mask the lower 3 bits to set the 802.1p priority */ @@ -925,7 +926,10 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, /* if this is not already set it means a VLAN 0 + priority needs * to be offloaded */ - first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; + if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2) + first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN; + else + first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; } } diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 73edc24d81d5..9a84d746a6c4 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -116,9 +116,12 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi; struct net_device *uplink_netdev = uplink_vsi->netdev; struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + struct ice_vsi_vlan_ops *vlan_ops; bool rule_added = false; - ice_vsi_manage_vlan_stripping(ctrl_vsi, false); + vlan_ops = ice_get_compat_vsi_vlan_ops(ctrl_vsi); + if (vlan_ops->dis_stripping(ctrl_vsi)) + return -ENODEV; ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx); @@ -127,7 +130,7 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) __dev_mc_unsync(uplink_netdev, NULL); netif_addr_unlock_bh(uplink_netdev); - if (ice_vsi_add_vlan(uplink_vsi, 0, ICE_FWD_TO_VSI)) + if (ice_vsi_add_vlan_zero(uplink_vsi)) goto err_def_rx; if (!ice_is_dflt_vsi_in_use(uplink_vsi->vsw)) { @@ -173,10 +176,20 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) int q_id; ice_for_each_txq(vsi, q_id) { - struct ice_repr *repr = pf->vf[q_id].repr; - struct ice_q_vector *q_vector = repr->q_vector; - struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id]; - struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id]; + struct ice_q_vector *q_vector; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + struct ice_repr *repr; + struct ice_vf *vf; + + vf = ice_get_vf_by_id(pf, q_id); + if (WARN_ON(!vf)) + continue; + + repr = vf->repr; + q_vector = repr->q_vector; + tx_ring = vsi->tx_rings[q_id]; + rx_ring = vsi->rx_rings[q_id]; q_vector->vsi = vsi; q_vector->reg_idx = vsi->q_vectors[0]->reg_idx; @@ -196,6 +209,38 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) rx_ring->q_vector = q_vector; rx_ring->next = NULL; rx_ring->netdev = repr->netdev; + + ice_put_vf(vf); + } +} + +/** + * ice_eswitch_release_reprs - clear PR VSIs configuration + * @pf: poiner to PF struct + * @ctrl_vsi: pointer to switchdev control VSI + */ +static void +ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) +{ + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); + + ice_for_each_vf(pf, bkt, vf) { + struct ice_vsi *vsi = vf->repr->src_vsi; + + /* Skip VFs that aren't configured */ + if (!vf->repr->dst) + continue; + + ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); + metadata_dst_free(vf->repr->dst); + vf->repr->dst = NULL; + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + + netif_napi_del(&vf->repr->q_vector->napi); } } @@ -207,11 +252,13 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) { struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; int max_vsi_num = 0; - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); - ice_for_each_vf(pf, i) { - struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; - struct ice_vf *vf = &pf->vf[i]; + ice_for_each_vf(pf, bkt, vf) { + struct ice_vsi *vsi = vf->repr->src_vsi; ice_remove_vsi_fltr(&pf->hw, vsi->idx); vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, @@ -228,14 +275,16 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); metadata_dst_free(vf->repr->dst); + vf->repr->dst = NULL; goto err; } - if (ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI)) { + if (ice_vsi_add_vlan_zero(vsi)) { ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); metadata_dst_free(vf->repr->dst); + vf->repr->dst = NULL; ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); goto err; } @@ -249,8 +298,8 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) netif_keep_dst(vf->repr->netdev); } - ice_for_each_vf(pf, i) { - struct ice_repr *repr = pf->vf[i].repr; + ice_for_each_vf(pf, bkt, vf) { + struct ice_repr *repr = vf->repr; struct ice_vsi *vsi = repr->src_vsi; struct metadata_dst *dst; @@ -263,43 +312,12 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) return 0; err: - for (i = i - 1; i >= 0; i--) { - struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; - struct ice_vf *vf = &pf->vf[i]; - - ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); - metadata_dst_free(vf->repr->dst); - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, - ICE_FWD_TO_VSI); - } + ice_eswitch_release_reprs(pf, ctrl_vsi); return -ENODEV; } /** - * ice_eswitch_release_reprs - clear PR VSIs configuration - * @pf: poiner to PF struct - * @ctrl_vsi: pointer to switchdev control VSI - */ -static void -ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) -{ - int i; - - ice_for_each_vf(pf, i) { - struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; - struct ice_vf *vf = &pf->vf[i]; - - ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); - metadata_dst_free(vf->repr->dst); - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, - ICE_FWD_TO_VSI); - - netif_napi_del(&vf->repr->q_vector->napi); - } -} - -/** * ice_eswitch_update_repr - reconfigure VF port representor * @vsi: VF VSI for which port representor is configured */ @@ -313,7 +331,7 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi) if (!ice_is_switchdev_running(pf)) return; - vf = &pf->vf[vsi->vf_id]; + vf = vsi->vf; repr = vf->repr; repr->src_vsi = vsi; repr->dst->u.port_info.port_id = vsi->vsi_num; @@ -321,7 +339,8 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi) ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); if (ret) { ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); - dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor", vsi->vf_id); + dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor", + vsi->vf->vf_id); } } @@ -405,7 +424,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf) static struct ice_vsi * ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, NULL, NULL); } /** @@ -414,10 +433,13 @@ ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) */ static void ice_eswitch_napi_del(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; - ice_for_each_vf(pf, i) - netif_napi_del(&pf->vf[i].repr->q_vector->napi); + lockdep_assert_held(&pf->vfs.table_lock); + + ice_for_each_vf(pf, bkt, vf) + netif_napi_del(&vf->repr->q_vector->napi); } /** @@ -426,10 +448,13 @@ static void ice_eswitch_napi_del(struct ice_pf *pf) */ static void ice_eswitch_napi_enable(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); - ice_for_each_vf(pf, i) - napi_enable(&pf->vf[i].repr->q_vector->napi); + ice_for_each_vf(pf, bkt, vf) + napi_enable(&vf->repr->q_vector->napi); } /** @@ -438,10 +463,13 @@ static void ice_eswitch_napi_enable(struct ice_pf *pf) */ static void ice_eswitch_napi_disable(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); - ice_for_each_vf(pf, i) - napi_disable(&pf->vf[i].repr->q_vector->napi); + ice_for_each_vf(pf, bkt, vf) + napi_disable(&vf->repr->q_vector->napi); } /** @@ -519,7 +547,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, if (pf->eswitch_mode == mode) return 0; - if (pf->num_alloc_vfs) { + if (ice_has_vfs(pf)) { dev_info(ice_pf_to_dev(pf), "Changing eswitch mode is allowed only if there is no VFs created"); NL_SET_ERR_MSG_MOD(extack, "Changing eswitch mode is allowed only if there is no VFs created"); return -EOPNOTSUPP; @@ -610,16 +638,17 @@ int ice_eswitch_configure(struct ice_pf *pf) */ static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf) { - struct ice_repr *repr; - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); if (test_bit(ICE_DOWN, pf->state)) return; - ice_for_each_vf(pf, i) { - repr = pf->vf[i].repr; - if (repr) - ice_repr_start_tx_queues(repr); + ice_for_each_vf(pf, bkt, vf) { + if (vf->repr) + ice_repr_start_tx_queues(vf->repr); } } @@ -629,16 +658,17 @@ static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf) */ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { - struct ice_repr *repr; - int i; + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); if (test_bit(ICE_DOWN, pf->state)) return; - ice_for_each_vf(pf, i) { - repr = pf->vf[i].repr; - if (repr) - ice_repr_stop_tx_queues(repr); + ice_for_each_vf(pf, bkt, vf) { + if (vf->repr) + ice_repr_stop_tx_queues(vf->repr); } } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index a5dc9824e255..24cda7e1f916 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -164,6 +164,7 @@ static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_PRIV_FLAG("vf-true-promisc-support", ICE_FLAG_VF_TRUE_PROMISC_ENA), ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF), + ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING), ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), }; @@ -315,16 +316,20 @@ out: */ static bool ice_active_vfs(struct ice_pf *pf) { - unsigned int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + bool active = false; + struct ice_vf *vf; + unsigned int bkt; - if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) - return true; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + active = true; + break; + } } + rcu_read_unlock(); - return false; + return active; } /** @@ -1295,6 +1300,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags); ret = -EAGAIN; } + + if (test_bit(ICE_FLAG_VF_VLAN_PRUNING, change_flags) && + ice_has_vfs(pf)) { + dev_err(dev, "vf-vlan-pruning: VLAN pruning cannot be changed while VFs are active.\n"); + /* toggle bit back to previous state */ + change_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags); + ret = -EOPNOTSUPP; + } ethtool_exit: clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); return ret; @@ -2803,6 +2816,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, /* clone ring and setup updated count */ xdp_rings[i] = *vsi->xdp_rings[i]; xdp_rings[i].count = new_tx_cnt; + xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1; + xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1; xdp_rings[i].desc = NULL; xdp_rings[i].tx_buf = NULL; err = ice_setup_tx_ring(&xdp_rings[i]); diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 4deb2c9446ec..38fe0a7e6975 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -5,9 +5,17 @@ #include "ice_flex_pipe.h" #include "ice_flow.h" +/* For supporting double VLAN mode, it is necessary to enable or disable certain + * boost tcam entries. The metadata labels names that match the following + * prefixes will be saved to allow enabling double VLAN mode. + */ +#define ICE_DVM_PRE "BOOST_MAC_VLAN_DVM" /* enable these entries */ +#define ICE_SVM_PRE "BOOST_MAC_VLAN_SVM" /* disable these entries */ + /* To support tunneling entries by PF, the package will append the PF number to * the label; for example TNL_VXLAN_PF0, TNL_VXLAN_PF1, TNL_VXLAN_PF2, etc. */ +#define ICE_TNL_PRE "TNL_" static const struct ice_tunnel_type_scan tnls[] = { { TNL_VXLAN, "TNL_VXLAN_PF" }, { TNL_GENEVE, "TNL_GENEVE_PF" }, @@ -523,6 +531,55 @@ ice_enum_labels(struct ice_seg *ice_seg, u32 type, struct ice_pkg_enum *state, } /** + * ice_add_tunnel_hint + * @hw: pointer to the HW structure + * @label_name: label text + * @val: value of the tunnel port boost entry + */ +static void ice_add_tunnel_hint(struct ice_hw *hw, char *label_name, u16 val) +{ + if (hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) { + u16 i; + + for (i = 0; tnls[i].type != TNL_LAST; i++) { + size_t len = strlen(tnls[i].label_prefix); + + /* Look for matching label start, before continuing */ + if (strncmp(label_name, tnls[i].label_prefix, len)) + continue; + + /* Make sure this label matches our PF. Note that the PF + * character ('0' - '7') will be located where our + * prefix string's null terminator is located. + */ + if ((label_name[len] - '0') == hw->pf_id) { + hw->tnl.tbl[hw->tnl.count].type = tnls[i].type; + hw->tnl.tbl[hw->tnl.count].valid = false; + hw->tnl.tbl[hw->tnl.count].boost_addr = val; + hw->tnl.tbl[hw->tnl.count].port = 0; + hw->tnl.count++; + break; + } + } + } +} + +/** + * ice_add_dvm_hint + * @hw: pointer to the HW structure + * @val: value of the boost entry + * @enable: true if entry needs to be enabled, or false if needs to be disabled + */ +static void ice_add_dvm_hint(struct ice_hw *hw, u16 val, bool enable) +{ + if (hw->dvm_upd.count < ICE_DVM_MAX_ENTRIES) { + hw->dvm_upd.tbl[hw->dvm_upd.count].boost_addr = val; + hw->dvm_upd.tbl[hw->dvm_upd.count].enable = enable; + hw->dvm_upd.count++; + } +} + +/** * ice_init_pkg_hints * @hw: pointer to the HW structure * @ice_seg: pointer to the segment of the package scan (non-NULL) @@ -548,32 +605,23 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg) label_name = ice_enum_labels(ice_seg, ICE_SID_LBL_RXPARSER_TMEM, &state, &val); - while (label_name && hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) { - for (i = 0; tnls[i].type != TNL_LAST; i++) { - size_t len = strlen(tnls[i].label_prefix); + while (label_name) { + if (!strncmp(label_name, ICE_TNL_PRE, strlen(ICE_TNL_PRE))) + /* check for a tunnel entry */ + ice_add_tunnel_hint(hw, label_name, val); - /* Look for matching label start, before continuing */ - if (strncmp(label_name, tnls[i].label_prefix, len)) - continue; + /* check for a dvm mode entry */ + else if (!strncmp(label_name, ICE_DVM_PRE, strlen(ICE_DVM_PRE))) + ice_add_dvm_hint(hw, val, true); - /* Make sure this label matches our PF. Note that the PF - * character ('0' - '7') will be located where our - * prefix string's null terminator is located. - */ - if ((label_name[len] - '0') == hw->pf_id) { - hw->tnl.tbl[hw->tnl.count].type = tnls[i].type; - hw->tnl.tbl[hw->tnl.count].valid = false; - hw->tnl.tbl[hw->tnl.count].boost_addr = val; - hw->tnl.tbl[hw->tnl.count].port = 0; - hw->tnl.count++; - break; - } - } + /* check for a svm mode entry */ + else if (!strncmp(label_name, ICE_SVM_PRE, strlen(ICE_SVM_PRE))) + ice_add_dvm_hint(hw, val, false); label_name = ice_enum_labels(NULL, 0, &state, &val); } - /* Cache the appropriate boost TCAM entry pointers */ + /* Cache the appropriate boost TCAM entry pointers for tunnels */ for (i = 0; i < hw->tnl.count; i++) { ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr, &hw->tnl.tbl[i].boost_entry); @@ -583,6 +631,11 @@ static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg) hw->tnl.valid_count[hw->tnl.tbl[i].type]++; } } + + /* Cache the appropriate boost TCAM entry pointers for DVM and SVM */ + for (i = 0; i < hw->dvm_upd.count; i++) + ice_find_boost_entry(ice_seg, hw->dvm_upd.tbl[i].boost_addr, + &hw->dvm_upd.tbl[i].boost_entry); } /* Key creation */ @@ -874,6 +927,27 @@ ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, } /** + * ice_aq_upload_section + * @hw: pointer to the hardware structure + * @pkg_buf: the package buffer which will receive the section + * @buf_size: the size of the package buffer + * @cd: pointer to command details structure or NULL + * + * Upload Section (0x0C41) + */ +int +ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, + u16 buf_size, struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_upload_section); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + return ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd); +} + +/** * ice_aq_update_pkg * @hw: pointer to the hardware structure * @pkg_buf: the package cmd buffer @@ -957,25 +1031,21 @@ ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type, } /** - * ice_update_pkg + * ice_update_pkg_no_lock * @hw: pointer to the hardware structure * @bufs: pointer to an array of buffers * @count: the number of buffers in the array - * - * Obtains change lock and updates package. */ -static int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count) +static int +ice_update_pkg_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 count) { - u32 offset, info, i; - int status; - - status = ice_acquire_change_lock(hw, ICE_RES_WRITE); - if (status) - return status; + int status = 0; + u32 i; for (i = 0; i < count; i++) { struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i); bool last = ((i + 1) == count); + u32 offset, info; status = ice_aq_update_pkg(hw, bh, le16_to_cpu(bh->data_end), last, &offset, &info, NULL); @@ -987,6 +1057,27 @@ static int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count) } } + return status; +} + +/** + * ice_update_pkg + * @hw: pointer to the hardware structure + * @bufs: pointer to an array of buffers + * @count: the number of buffers in the array + * + * Obtains change lock and updates package. + */ +static int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count) +{ + int status; + + status = ice_acquire_change_lock(hw, ICE_RES_WRITE); + if (status) + return status; + + status = ice_update_pkg_no_lock(hw, bufs, count); + ice_release_change_lock(hw); return status; @@ -1080,6 +1171,13 @@ ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count) break; } + if (!status) { + status = ice_set_vlan_mode(hw); + if (status) + ice_debug(hw, ICE_DBG_PKG, "Failed to set VLAN mode: err %d\n", + status); + } + ice_release_global_cfg_lock(hw); return state; @@ -1117,6 +1215,7 @@ static enum ice_ddp_state ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg) { struct ice_buf_table *ice_buf_tbl; + int status; ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n", ice_seg->hdr.seg_format_ver.major, @@ -1133,8 +1232,12 @@ ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg) ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n", le32_to_cpu(ice_buf_tbl->buf_count)); - return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array, - le32_to_cpu(ice_buf_tbl->buf_count)); + status = ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array, + le32_to_cpu(ice_buf_tbl->buf_count)); + + ice_post_pkg_dwnld_vlan_mode_cfg(hw); + + return status; } /** @@ -1897,7 +2000,7 @@ void ice_init_prof_result_bm(struct ice_hw *hw) * * Frees a package buffer */ -static void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld) +void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld) { devm_kfree(ice_hw_to_dev(hw), bld); } @@ -1997,6 +2100,43 @@ ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size) } /** + * ice_pkg_buf_alloc_single_section + * @hw: pointer to the HW structure + * @type: the section type value + * @size: the size of the section to reserve (in bytes) + * @section: returns pointer to the section + * + * Allocates a package buffer with a single section. + * Note: all package contents must be in Little Endian form. + */ +struct ice_buf_build * +ice_pkg_buf_alloc_single_section(struct ice_hw *hw, u32 type, u16 size, + void **section) +{ + struct ice_buf_build *buf; + + if (!section) + return NULL; + + buf = ice_pkg_buf_alloc(hw); + if (!buf) + return NULL; + + if (ice_pkg_buf_reserve_section(buf, 1)) + goto ice_pkg_buf_alloc_single_section_err; + + *section = ice_pkg_buf_alloc_section(buf, type, size); + if (!*section) + goto ice_pkg_buf_alloc_single_section_err; + + return buf; + +ice_pkg_buf_alloc_single_section_err: + ice_pkg_buf_free(hw, buf); + return NULL; +} + +/** * ice_pkg_buf_get_active_sections * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) * @@ -2023,7 +2163,7 @@ static u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld) * * Return a pointer to the buffer's header */ -static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld) +struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld) { if (!bld) return NULL; @@ -2060,6 +2200,89 @@ ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port, } /** + * ice_upd_dvm_boost_entry + * @hw: pointer to the HW structure + * @entry: pointer to double vlan boost entry info + */ +static int +ice_upd_dvm_boost_entry(struct ice_hw *hw, struct ice_dvm_entry *entry) +{ + struct ice_boost_tcam_section *sect_rx, *sect_tx; + int status = -ENOSPC; + struct ice_buf_build *bld; + u8 val, dc, nm; + + bld = ice_pkg_buf_alloc(hw); + if (!bld) + return -ENOMEM; + + /* allocate 2 sections, one for Rx parser, one for Tx parser */ + if (ice_pkg_buf_reserve_section(bld, 2)) + goto ice_upd_dvm_boost_entry_err; + + sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM, + struct_size(sect_rx, tcam, 1)); + if (!sect_rx) + goto ice_upd_dvm_boost_entry_err; + sect_rx->count = cpu_to_le16(1); + + sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM, + struct_size(sect_tx, tcam, 1)); + if (!sect_tx) + goto ice_upd_dvm_boost_entry_err; + sect_tx->count = cpu_to_le16(1); + + /* copy original boost entry to update package buffer */ + memcpy(sect_rx->tcam, entry->boost_entry, sizeof(*sect_rx->tcam)); + + /* re-write the don't care and never match bits accordingly */ + if (entry->enable) { + /* all bits are don't care */ + val = 0x00; + dc = 0xFF; + nm = 0x00; + } else { + /* disable, one never match bit, the rest are don't care */ + val = 0x00; + dc = 0xF7; + nm = 0x08; + } + + ice_set_key((u8 *)§_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key), + &val, NULL, &dc, &nm, 0, sizeof(u8)); + + /* exact copy of entry to Tx section entry */ + memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam)); + + status = ice_update_pkg_no_lock(hw, ice_pkg_buf(bld), 1); + +ice_upd_dvm_boost_entry_err: + ice_pkg_buf_free(hw, bld); + + return status; +} + +/** + * ice_set_dvm_boost_entries + * @hw: pointer to the HW structure + * + * Enable double vlan by updating the appropriate boost tcam entries. + */ +int ice_set_dvm_boost_entries(struct ice_hw *hw) +{ + int status; + u16 i; + + for (i = 0; i < hw->dvm_upd.count; i++) { + status = ice_upd_dvm_boost_entry(hw, &hw->dvm_upd.tbl[i]); + if (status) + return status; + } + + return 0; +} + +/** * ice_tunnel_idx_to_entry - convert linear index to the sparse one * @hw: pointer to the HW structure * @type: type of tunnel diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 6cbc29bcb02f..2fd5312494c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -89,6 +89,12 @@ ice_init_prof_result_bm(struct ice_hw *hw); int ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt, unsigned long *bm, struct list_head *fv_list); +int +ice_pkg_buf_unreserve_section(struct ice_buf_build *bld, u16 count); +u16 ice_pkg_buf_get_free_space(struct ice_buf_build *bld); +int +ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, + u16 buf_size, struct ice_sq_cd *cd); bool ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port, enum ice_tunnel_type type); @@ -96,6 +102,7 @@ int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); +int ice_set_dvm_boost_entries(struct ice_hw *hw); /* Rx parser PTYPE functions */ bool ice_hw_ptype_ena(struct ice_hw *hw, u16 ptype); @@ -119,4 +126,10 @@ void ice_fill_blk_tbls(struct ice_hw *hw); void ice_clear_hw_tbls(struct ice_hw *hw); void ice_free_hw_tbls(struct ice_hw *hw); int ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id); +struct ice_buf_build * +ice_pkg_buf_alloc_single_section(struct ice_hw *hw, u32 type, u16 size, + void **section); +struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld); +void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld); + #endif /* _ICE_FLEX_PIPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index fc087e0b5292..5735e9542a49 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -162,6 +162,7 @@ struct ice_meta_sect { #define ICE_SID_RXPARSER_MARKER_PTYPE 55 #define ICE_SID_RXPARSER_BOOST_TCAM 56 +#define ICE_SID_RXPARSER_METADATA_INIT 58 #define ICE_SID_TXPARSER_BOOST_TCAM 66 #define ICE_SID_XLT0_PE 80 @@ -442,6 +443,19 @@ struct ice_tunnel_table { u16 valid_count[__TNL_TYPE_CNT]; }; +struct ice_dvm_entry { + u16 boost_addr; + u16 enable; + struct ice_boost_tcam_entry *boost_entry; +}; + +#define ICE_DVM_MAX_ENTRIES 48 + +struct ice_dvm_table { + struct ice_dvm_entry tbl[ICE_DVM_MAX_ENTRIES]; + u16 count; +}; + struct ice_pkg_es { __le16 count; __le16 offset; @@ -662,4 +676,30 @@ enum ice_prof_type { ICE_PROF_TUN_ALL = 0x6, ICE_PROF_ALL = 0xFF, }; + +/* Number of bits/bytes contained in meta init entry. Note, this should be a + * multiple of 32 bits. + */ +#define ICE_META_INIT_BITS 192 +#define ICE_META_INIT_DW_CNT (ICE_META_INIT_BITS / (sizeof(__le32) * \ + BITS_PER_BYTE)) + +/* The meta init Flag field starts at this bit */ +#define ICE_META_FLAGS_ST 123 + +/* The entry and bit to check for Double VLAN Mode (DVM) support */ +#define ICE_META_VLAN_MODE_ENTRY 0 +#define ICE_META_FLAG_VLAN_MODE 60 +#define ICE_META_VLAN_MODE_BIT (ICE_META_FLAGS_ST + \ + ICE_META_FLAG_VLAN_MODE) + +struct ice_meta_init_entry { + __le32 bm[ICE_META_INIT_DW_CNT]; +}; + +struct ice_meta_init_section { + __le16 count; + __le16 offset; + struct ice_meta_init_entry entry; +}; #endif /* _ICE_FLEX_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index c29177c6bb9d..af57eb114966 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -203,21 +203,22 @@ ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list, * ice_fltr_add_vlan_to_list - add VLAN filter info to exsisting list * @vsi: pointer to VSI struct * @list: list to add filter info to - * @vlan_id: VLAN ID to add - * @action: filter action + * @vlan: VLAN filter details */ static int ice_fltr_add_vlan_to_list(struct ice_vsi *vsi, struct list_head *list, - u16 vlan_id, enum ice_sw_fwd_act_type action) + struct ice_vlan *vlan) { struct ice_fltr_info info = { 0 }; info.flag = ICE_FLTR_TX; info.src_id = ICE_SRC_ID_VSI; info.lkup_type = ICE_SW_LKUP_VLAN; - info.fltr_act = action; + info.fltr_act = ICE_FWD_TO_VSI; info.vsi_handle = vsi->idx; - info.l_data.vlan.vlan_id = vlan_id; + info.l_data.vlan.vlan_id = vlan->vid; + info.l_data.vlan.tpid = vlan->tpid; + info.l_data.vlan.tpid_valid = true; return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info, list); @@ -310,19 +311,17 @@ ice_fltr_prepare_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac, /** * ice_fltr_prepare_vlan - add or remove VLAN filter * @vsi: pointer to VSI struct - * @vlan_id: VLAN ID to add - * @action: action to be performed on filter match + * @vlan: VLAN filter details * @vlan_action: pointer to add or remove VLAN function */ static int -ice_fltr_prepare_vlan(struct ice_vsi *vsi, u16 vlan_id, - enum ice_sw_fwd_act_type action, +ice_fltr_prepare_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan, int (*vlan_action)(struct ice_vsi *, struct list_head *)) { LIST_HEAD(tmp_list); int result; - if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan_id, action)) + if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan)) return -ENOMEM; result = vlan_action(vsi, &tmp_list); @@ -395,27 +394,21 @@ int ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac, /** * ice_fltr_add_vlan - add single VLAN filter * @vsi: pointer to VSI struct - * @vlan_id: VLAN ID to add - * @action: action to be performed on filter match + * @vlan: VLAN filter details */ -int ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vlan_id, - enum ice_sw_fwd_act_type action) +int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) { - return ice_fltr_prepare_vlan(vsi, vlan_id, action, - ice_fltr_add_vlan_list); + return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_add_vlan_list); } /** * ice_fltr_remove_vlan - remove VLAN filter * @vsi: pointer to VSI struct - * @vlan_id: filter VLAN to remove - * @action: action to remove + * @vlan: VLAN filter details */ -int ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vlan_id, - enum ice_sw_fwd_act_type action) +int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) { - return ice_fltr_prepare_vlan(vsi, vlan_id, action, - ice_fltr_remove_vlan_list); + return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_remove_vlan_list); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h index 3eb42479175f..0f3dbc308eec 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.h +++ b/drivers/net/ethernet/intel/ice/ice_fltr.h @@ -4,6 +4,8 @@ #ifndef _ICE_FLTR_H_ #define _ICE_FLTR_H_ +#include "ice_vlan.h" + void ice_fltr_free_list(struct device *dev, struct list_head *h); int ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi, @@ -32,12 +34,8 @@ ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac, enum ice_sw_fwd_act_type action); int ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list); -int -ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vid, - enum ice_sw_fwd_act_type action); -int -ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vid, - enum ice_sw_fwd_act_type action); +int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan); +int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan); int ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c new file mode 100644 index 000000000000..755e1580f368 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include <linux/tty_driver.h> + +/** + * ice_gnss_read - Read data from internal GNSS module + * @work: GNSS read work structure + * + * Read the data from internal GNSS receiver, number of bytes read will be + * returned in *read_data parameter. + */ +static void ice_gnss_read(struct kthread_work *work) +{ + struct gnss_serial *gnss = container_of(work, struct gnss_serial, + read_work.work); + struct ice_aqc_link_topo_addr link_topo; + u8 i2c_params, bytes_read; + struct tty_port *port; + struct ice_pf *pf; + struct ice_hw *hw; + __be16 data_len_b; + char *buf = NULL; + u16 i, data_len; + int err = 0; + + pf = gnss->back; + if (!pf || !gnss->tty || !gnss->tty->port) { + err = -EFAULT; + goto exit; + } + + hw = &pf->hw; + port = gnss->tty->port; + + buf = (char *)get_zeroed_page(GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto exit; + } + + memset(&link_topo, 0, sizeof(struct ice_aqc_link_topo_addr)); + link_topo.topo_params.index = ICE_E810T_GNSS_I2C_BUS; + link_topo.topo_params.node_type_ctx |= + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE); + + i2c_params = ICE_GNSS_UBX_DATA_LEN_WIDTH | + ICE_AQC_I2C_USE_REPEATED_START; + + /* Read data length in a loop, when it's not 0 the data is ready */ + for (i = 0; i < ICE_MAX_UBX_READ_TRIES; i++) { + err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR, + cpu_to_le16(ICE_GNSS_UBX_DATA_LEN_H), + i2c_params, (u8 *)&data_len_b, NULL); + if (err) + goto exit_buf; + + data_len = be16_to_cpu(data_len_b); + if (data_len != 0 && data_len != U16_MAX) + break; + + mdelay(10); + } + + data_len = min(data_len, (u16)PAGE_SIZE); + data_len = tty_buffer_request_room(port, data_len); + if (!data_len) { + err = -ENOMEM; + goto exit_buf; + } + + /* Read received data */ + for (i = 0; i < data_len; i += bytes_read) { + u16 bytes_left = data_len - i; + + bytes_read = bytes_left < ICE_MAX_I2C_DATA_SIZE ? bytes_left : + ICE_MAX_I2C_DATA_SIZE; + + err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR, + cpu_to_le16(ICE_GNSS_UBX_EMPTY_DATA), + bytes_read, &buf[i], NULL); + if (err) + goto exit_buf; + } + + /* Send the data to the tty layer for users to read. This doesn't + * actually push the data through unless tty->low_latency is set. + */ + tty_insert_flip_string(port, buf, i); + tty_flip_buffer_push(port); + +exit_buf: + free_page((unsigned long)buf); + kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, + ICE_GNSS_TIMER_DELAY_TIME); +exit: + if (err) + dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err); +} + +/** + * ice_gnss_struct_init - Initialize GNSS structure for the TTY + * @pf: Board private structure + */ +static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct kthread_worker *kworker; + struct gnss_serial *gnss; + + gnss = kzalloc(sizeof(*gnss), GFP_KERNEL); + if (!gnss) + return NULL; + + mutex_init(&gnss->gnss_mutex); + gnss->open_count = 0; + gnss->back = pf; + pf->gnss_serial = gnss; + + kthread_init_delayed_work(&gnss->read_work, ice_gnss_read); + /* Allocate a kworker for handling work required for the GNSS TTY + * writes. + */ + kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev)); + if (!kworker) { + kfree(gnss); + return NULL; + } + + gnss->kworker = kworker; + + return gnss; +} + +/** + * ice_gnss_tty_open - Initialize GNSS structures on TTY device open + * @tty: pointer to the tty_struct + * @filp: pointer to the file + * + * This routine is mandatory. If this routine is not filled in, the attempted + * open will fail with ENODEV. + */ +static int ice_gnss_tty_open(struct tty_struct *tty, struct file *filp) +{ + struct gnss_serial *gnss; + struct ice_pf *pf; + + pf = (struct ice_pf *)tty->driver->driver_state; + if (!pf) + return -EFAULT; + + /* Clear the pointer in case something fails */ + tty->driver_data = NULL; + + /* Get the serial object associated with this tty pointer */ + gnss = pf->gnss_serial; + if (!gnss) { + /* Initialize GNSS struct on the first device open */ + gnss = ice_gnss_struct_init(pf); + if (!gnss) + return -ENOMEM; + } + + mutex_lock(&gnss->gnss_mutex); + + /* Save our structure within the tty structure */ + tty->driver_data = gnss; + gnss->tty = tty; + gnss->open_count++; + kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, 0); + + mutex_unlock(&gnss->gnss_mutex); + + return 0; +} + +/** + * ice_gnss_tty_close - Cleanup GNSS structures on tty device close + * @tty: pointer to the tty_struct + * @filp: pointer to the file + */ +static void ice_gnss_tty_close(struct tty_struct *tty, struct file *filp) +{ + struct gnss_serial *gnss = tty->driver_data; + struct ice_pf *pf; + + if (!gnss) + return; + + pf = (struct ice_pf *)tty->driver->driver_state; + if (!pf) + return; + + mutex_lock(&gnss->gnss_mutex); + + if (!gnss->open_count) { + /* Port was never opened */ + dev_err(ice_pf_to_dev(pf), "GNSS port not opened\n"); + goto exit; + } + + gnss->open_count--; + if (gnss->open_count <= 0) { + /* Port is in shutdown state */ + kthread_cancel_delayed_work_sync(&gnss->read_work); + } +exit: + mutex_unlock(&gnss->gnss_mutex); +} + +/** + * ice_gnss_tty_write - Dummy TTY write function to avoid kernel panic + * @tty: pointer to the tty_struct + * @buf: pointer to the user data + * @cnt: the number of characters that was able to be sent to the hardware (or + * queued to be sent at a later time) + */ +static int +ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int cnt) +{ + return 0; +} + +/** + * ice_gnss_tty_write_room - Dummy TTY write_room function to avoid kernel panic + * @tty: pointer to the tty_struct + */ +static unsigned int ice_gnss_tty_write_room(struct tty_struct *tty) +{ + return 0; +} + +static const struct tty_operations tty_gps_ops = { + .open = ice_gnss_tty_open, + .close = ice_gnss_tty_close, + .write = ice_gnss_tty_write, + .write_room = ice_gnss_tty_write_room, +}; + +/** + * ice_gnss_create_tty_driver - Create a TTY driver for GNSS + * @pf: Board private structure + */ +static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + const int ICE_TTYDRV_NAME_MAX = 14; + struct tty_driver *tty_driver; + char *ttydrv_name; + int err; + + tty_driver = tty_alloc_driver(1, TTY_DRIVER_REAL_RAW); + if (!tty_driver) { + dev_err(ice_pf_to_dev(pf), "Failed to allocate memory for GNSS TTY\n"); + return NULL; + } + + ttydrv_name = kzalloc(ICE_TTYDRV_NAME_MAX, GFP_KERNEL); + if (!ttydrv_name) { + tty_driver_kref_put(tty_driver); + return NULL; + } + + snprintf(ttydrv_name, ICE_TTYDRV_NAME_MAX, "ttyGNSS_%02x%02x_", + (u8)pf->pdev->bus->number, (u8)PCI_SLOT(pf->pdev->devfn)); + + /* Initialize the tty driver*/ + tty_driver->owner = THIS_MODULE; + tty_driver->driver_name = dev_driver_string(dev); + tty_driver->name = (const char *)ttydrv_name; + tty_driver->type = TTY_DRIVER_TYPE_SERIAL; + tty_driver->subtype = SERIAL_TYPE_NORMAL; + tty_driver->init_termios = tty_std_termios; + tty_driver->init_termios.c_iflag &= ~INLCR; + tty_driver->init_termios.c_iflag |= IGNCR; + tty_driver->init_termios.c_oflag &= ~OPOST; + tty_driver->init_termios.c_lflag &= ~ICANON; + tty_driver->init_termios.c_cflag &= ~(CSIZE | CBAUD | CBAUDEX); + /* baud rate 9600 */ + tty_termios_encode_baud_rate(&tty_driver->init_termios, 9600, 9600); + tty_driver->driver_state = pf; + tty_set_operations(tty_driver, &tty_gps_ops); + + pf->gnss_serial = NULL; + + tty_port_init(&pf->gnss_tty_port); + tty_port_link_device(&pf->gnss_tty_port, tty_driver, 0); + + err = tty_register_driver(tty_driver); + if (err) { + dev_err(ice_pf_to_dev(pf), "Failed to register TTY driver err=%d\n", + err); + + tty_port_destroy(&pf->gnss_tty_port); + kfree(ttydrv_name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + + return NULL; + } + + return tty_driver; +} + +/** + * ice_gnss_init - Initialize GNSS TTY support + * @pf: Board private structure + */ +void ice_gnss_init(struct ice_pf *pf) +{ + struct tty_driver *tty_driver; + + tty_driver = ice_gnss_create_tty_driver(pf); + if (!tty_driver) + return; + + pf->ice_gnss_tty_driver = tty_driver; + + set_bit(ICE_FLAG_GNSS, pf->flags); + dev_info(ice_pf_to_dev(pf), "GNSS TTY init successful\n"); +} + +/** + * ice_gnss_exit - Disable GNSS TTY support + * @pf: Board private structure + */ +void ice_gnss_exit(struct ice_pf *pf) +{ + if (!test_bit(ICE_FLAG_GNSS, pf->flags) || !pf->ice_gnss_tty_driver) + return; + + tty_port_destroy(&pf->gnss_tty_port); + + if (pf->gnss_serial) { + struct gnss_serial *gnss = pf->gnss_serial; + + kthread_cancel_delayed_work_sync(&gnss->read_work); + kfree(gnss); + pf->gnss_serial = NULL; + } + + tty_unregister_driver(pf->ice_gnss_tty_driver); + kfree(pf->ice_gnss_tty_driver->name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + pf->ice_gnss_tty_driver = NULL; +} + +/** + * ice_gnss_is_gps_present - Check if GPS HW is present + * @hw: pointer to HW struct + */ +bool ice_gnss_is_gps_present(struct ice_hw *hw) +{ + if (!hw->func_caps.ts_func_info.src_tmr_owned) + return false; + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) + if (ice_is_e810t(hw)) { + int err; + u8 data; + + err = ice_read_pca9575_reg_e810t(hw, ICE_PCA9575_P0_IN, &data); + if (err || !!(data & ICE_E810T_P0_GNSS_PRSNT_N)) + return false; + } else { + return false; + } +#else + if (!ice_is_e810t(hw)) + return false; +#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ + + return true; +} diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h new file mode 100644 index 000000000000..9211adb2372c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_gnss.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#ifndef _ICE_GNSS_H_ +#define _ICE_GNSS_H_ + +#include <linux/tty.h> +#include <linux/tty_flip.h> + +#define ICE_E810T_GNSS_I2C_BUS 0x2 +#define ICE_GNSS_UBX_I2C_BUS_ADDR 0x42 +/* Data length register is big endian */ +#define ICE_GNSS_UBX_DATA_LEN_H 0xFD +#define ICE_GNSS_UBX_DATA_LEN_WIDTH 2 +#define ICE_GNSS_UBX_EMPTY_DATA 0xFF +#define ICE_GNSS_TIMER_DELAY_TIME (HZ / 10) /* 0.1 second per message */ +#define ICE_MAX_I2C_DATA_SIZE FIELD_MAX(ICE_AQC_I2C_DATA_SIZE_M) +#define ICE_MAX_UBX_READ_TRIES 255 + +/** + * struct gnss_serial - data used to initialize GNSS TTY port + * @back: back pointer to PF + * @tty: pointer to the tty for this device + * @open_count: number of times this port has been opened + * @gnss_mutex: gnss_mutex used to protect GNSS serial operations + * @kworker: kwork thread for handling periodic work + * @read_work: read_work function for handling GNSS reads + */ +struct gnss_serial { + struct ice_pf *back; + struct tty_struct *tty; + int open_count; + struct mutex gnss_mutex; /* protects GNSS serial structure */ + struct kthread_worker *kworker; + struct kthread_delayed_work read_work; +}; + +#if IS_ENABLED(CONFIG_TTY) +void ice_gnss_init(struct ice_pf *pf); +void ice_gnss_exit(struct ice_pf *pf); +bool ice_gnss_is_gps_present(struct ice_hw *hw); +#else +static inline void ice_gnss_init(struct ice_pf *pf) { } +static inline void ice_gnss_exit(struct ice_pf *pf) { } +static inline bool ice_gnss_is_gps_present(struct ice_hw *hw) +{ + return false; +} +#endif /* IS_ENABLED(CONFIG_TTY) */ +#endif /* _ICE_GNSS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index fc3580167e7b..73aa520317d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -79,7 +79,7 @@ int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset) dev = ice_pf_to_dev(pf); - if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + if (!ice_is_rdma_ena(pf)) return -EINVAL; vsi = ice_get_main_vsi(pf); @@ -227,6 +227,11 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i]; + + qos->pfc_mode = dcbx_cfg->pfc_mode; + if (qos->pfc_mode == IIDC_DSCP_PFC_MODE) + for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++) + qos->dscp_map[i] = dcbx_cfg->dscp_map[i]; } EXPORT_SYMBOL_GPL(ice_get_qos_params); @@ -236,7 +241,7 @@ EXPORT_SYMBOL_GPL(ice_get_qos_params); */ static int ice_reserve_rdma_qvector(struct ice_pf *pf) { - if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + if (ice_is_rdma_ena(pf)) { int index; index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix, @@ -274,7 +279,7 @@ int ice_plug_aux_dev(struct ice_pf *pf) /* if this PF doesn't support a technology that requires auxiliary * devices, then gracefully exit */ - if (!ice_is_aux_ena(pf)) + if (!ice_is_rdma_ena(pf)) return 0; iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 85a612838a89..b3baf7c3f910 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -424,6 +424,8 @@ enum ice_rx_flex_desc_status_error_0_bits { enum ice_rx_flex_desc_status_error_1_bits { /* Note: These are predefined bit offsets */ ICE_RX_FLEX_DESC_STATUS1_NAT_S = 4, + /* [10:5] reserved */ + ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S = 11, ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */ }; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 53256aca27c7..113a2c56c14c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -8,6 +8,7 @@ #include "ice_fltr.h" #include "ice_dcb_lib.h" #include "ice_devlink.h" +#include "ice_vsi_vlan_ops.h" /** * ice_vsi_type_str - maps VSI type enum to string equivalents @@ -165,21 +166,19 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) /** * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI * @vsi: the VSI being configured - * @vf_id: ID of the VF being configured + * @vf: the VF associated with this VSI, if any * * Return 0 on success and a negative value on error */ -static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) +static void ice_vsi_set_num_qs(struct ice_vsi *vsi, struct ice_vf *vf) { + enum ice_vsi_type vsi_type = vsi->type; struct ice_pf *pf = vsi->back; - struct ice_vf *vf = NULL; - if (vsi->type == ICE_VSI_VF) - vsi->vf_id = vf_id; - else - vsi->vf_id = ICE_INVAL_VFID; + if (WARN_ON(vsi_type == ICE_VSI_VF && !vf)) + return; - switch (vsi->type) { + switch (vsi_type) { case ICE_VSI_PF: if (vsi->req_txq) { vsi->alloc_txq = vsi->req_txq; @@ -216,22 +215,21 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) /* The number of queues for ctrl VSI is equal to number of VFs. * Each ring is associated to the corresponding VF_PR netdev. */ - vsi->alloc_txq = pf->num_alloc_vfs; - vsi->alloc_rxq = pf->num_alloc_vfs; + vsi->alloc_txq = ice_get_num_vfs(pf); + vsi->alloc_rxq = vsi->alloc_txq; vsi->num_q_vectors = 1; break; case ICE_VSI_VF: - vf = &pf->vf[vsi->vf_id]; if (vf->num_req_qs) vf->num_vf_qs = vf->num_req_qs; vsi->alloc_txq = vf->num_vf_qs; vsi->alloc_rxq = vf->num_vf_qs; - /* pf->num_msix_per_vf includes (VF miscellaneous vector + + /* pf->vfs.num_msix_per includes (VF miscellaneous vector + * data queue interrupts). Since vsi->num_q_vectors is number * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the * original vector count */ - vsi->num_q_vectors = pf->num_msix_per_vf - ICE_NONQ_VECS_VF; + vsi->num_q_vectors = pf->vfs.num_msix_per - ICE_NONQ_VECS_VF; break; case ICE_VSI_CTRL: vsi->alloc_txq = 1; @@ -247,7 +245,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = 1; break; default: - dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type); + dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type); break; } @@ -298,7 +296,7 @@ void ice_vsi_delete(struct ice_vsi *vsi) return; if (vsi->type == ICE_VSI_VF) - ctxt->vf_num = vsi->vf_id; + ctxt->vf_num = vsi->vf->vf_id; ctxt->vsi_num = vsi->vsi_num; memcpy(&ctxt->info, &vsi->info, sizeof(ctxt->info)); @@ -383,8 +381,7 @@ int ice_vsi_clear(struct ice_vsi *vsi) pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL) pf->next_vsi = vsi->idx; - if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && - vsi->vf_id != ICE_INVAL_VFID) + if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && vsi->vf) pf->next_vsi = vsi->idx; ice_vsi_free_arrays(vsi); @@ -436,13 +433,16 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; struct ice_pf *pf = q_vector->vsi->back; - int i; + struct ice_vf *vf; + unsigned int bkt; if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) return IRQ_HANDLED; - ice_for_each_vf(pf, i) - napi_schedule(&pf->vf[i].repr->q_vector->napi); + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + napi_schedule(&vf->repr->q_vector->napi); + rcu_read_unlock(); return IRQ_HANDLED; } @@ -452,17 +452,24 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d * @pf: board private structure * @vsi_type: type of VSI * @ch: ptr to channel - * @vf_id: ID of the VF being configured + * @vf: VF for ICE_VSI_VF and ICE_VSI_CTRL + * + * The VF pointer is used for ICE_VSI_VF and ICE_VSI_CTRL. For ICE_VSI_CTRL, + * it may be NULL in the case there is no association with a VF. For + * ICE_VSI_VF the VF pointer *must not* be NULL. * * returns a pointer to a VSI on success, NULL on failure. */ static struct ice_vsi * ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, - struct ice_channel *ch, u16 vf_id) + struct ice_channel *ch, struct ice_vf *vf) { struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; + if (WARN_ON(vsi_type == ICE_VSI_VF && !vf)) + return NULL; + /* Need to protect the allocation of the VSIs at the PF level */ mutex_lock(&pf->sw_mutex); @@ -484,9 +491,9 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, set_bit(ICE_VSI_DOWN, vsi->state); if (vsi_type == ICE_VSI_VF) - ice_vsi_set_num_qs(vsi, vf_id); + ice_vsi_set_num_qs(vsi, vf); else if (vsi_type != ICE_VSI_CHNL) - ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); + ice_vsi_set_num_qs(vsi, NULL); switch (vsi->type) { case ICE_VSI_SWITCHDEV_CTRL: @@ -509,10 +516,16 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, /* Setup ctrl VSI MSIX irq handler */ vsi->irq_handler = ice_msix_clean_ctrl_vsi; + + /* For the PF control VSI this is NULL, for the VF control VSI + * this will be the first VF to allocate it. + */ + vsi->vf = vf; break; case ICE_VSI_VF: if (ice_vsi_alloc_arrays(vsi)) goto err_rings; + vsi->vf = vf; break; case ICE_VSI_CHNL: if (!ch) @@ -530,7 +543,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, goto unlock_pf; } - if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) { + if (vsi->type == ICE_VSI_CTRL && !vf) { /* Use the last VSI slot as the index for PF control VSI */ vsi->idx = pf->num_alloc_vsi - 1; pf->ctrl_vsi_idx = vsi->idx; @@ -545,8 +558,8 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, pf->next_vsi); } - if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID) - pf->vf[vf_id].ctrl_vsi_idx = vsi->idx; + if (vsi->type == ICE_VSI_CTRL && vf) + vf->ctrl_vsi_idx = vsi->idx; goto unlock_pf; err_rings: @@ -732,14 +745,14 @@ bool ice_is_safe_mode(struct ice_pf *pf) } /** - * ice_is_aux_ena + * ice_is_rdma_ena * @pf: pointer to the PF struct * - * returns true if AUX devices/drivers are supported, false otherwise + * returns true if RDMA is currently supported, false otherwise */ -bool ice_is_aux_ena(struct ice_pf *pf) +bool ice_is_rdma_ena(struct ice_pf *pf) { - return test_bit(ICE_FLAG_AUX_ENA, pf->flags); + return test_bit(ICE_FLAG_RDMA_ENA, pf->flags); } /** @@ -838,11 +851,12 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) /** * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI + * @hw: HW structure used to determine the VLAN mode of the device * @ctxt: the VSI context being set * * This initializes a default VSI context for all sections except the Queues. */ -static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) +static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) { u32 table = 0; @@ -853,13 +867,27 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; - /* By default bits 3 and 4 in vlan_flags are 0's which results in legacy - * behavior (show VLAN, DEI, and UP) in descriptor. Also, allow all - * packets untagged/tagged. + /* allow all untagged/tagged packets by default on Tx */ + ctxt->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL & + ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >> + ICE_AQ_VSI_INNER_VLAN_TX_MODE_S); + /* SVM - by default bits 3 and 4 in inner_vlan_flags are 0's which + * results in legacy behavior (show VLAN, DEI, and UP) in descriptor. + * + * DVM - leave inner VLAN in packet by default */ - ctxt->info.vlan_flags = ((ICE_AQ_VSI_VLAN_MODE_ALL & - ICE_AQ_VSI_VLAN_MODE_M) >> - ICE_AQ_VSI_VLAN_MODE_S); + if (ice_is_dvm_ena(hw)) { + ctxt->info.inner_vlan_flags |= + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + ctxt->info.outer_vlan_flags = + (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M; + ctxt->info.outer_vlan_flags |= + (ICE_AQ_VSI_OUTER_TAG_VLAN_8100 << + ICE_AQ_VSI_OUTER_TAG_TYPE_S) & + ICE_AQ_VSI_OUTER_TAG_TYPE_M; + } /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); @@ -1114,7 +1142,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) case ICE_VSI_VF: ctxt->flags = ICE_AQ_VSI_TYPE_VF; /* VF number here is the absolute VF number (0-255) */ - ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id; + ctxt->vf_num = vsi->vf->vf_id + hw->func_caps.vf_base_id; break; default: ret = -ENODEV; @@ -1136,7 +1164,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; } - ice_set_dflt_vsi_ctx(ctxt); + ice_set_dflt_vsi_ctx(hw, ctxt); if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) ice_set_fd_vsi_ctx(ctxt, vsi); /* if the switch is in VEB mode, allow VSI loopback */ @@ -1168,25 +1196,6 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); } - /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off - * respectively - */ - if (vsi->type == ICE_VSI_VF) { - ctxt->info.valid_sections |= - cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); - if (pf->vf[vsi->vf_id].spoofchk) { - ctxt->info.sec_flags |= - ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | - (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); - } else { - ctxt->info.sec_flags &= - ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | - (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)); - } - } - /* Allow control frames out of main VSI */ if (vsi->type == ICE_VSI_PF) { ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; @@ -1325,6 +1334,36 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) } /** + * ice_get_vf_ctrl_res - Get VF control VSI resource + * @pf: pointer to the PF structure + * @vsi: the VSI to allocate a resource for + * + * Look up whether another VF has already allocated the control VSI resource. + * If so, re-use this resource so that we share it among all VFs. + * + * Otherwise, allocate the resource and return it. + */ +static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) +{ + struct ice_vf *vf; + unsigned int bkt; + int base; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { + base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; + rcu_read_unlock(); + return base; + } + } + rcu_read_unlock(); + + return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors, + ICE_RES_VF_CTRL_VEC_ID); +} + +/** * ice_vsi_setup_vector_base - Set up the base vector for the given VSI * @vsi: ptr to the VSI * @@ -1356,20 +1395,8 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) { - base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; - break; - } - } - if (i == pf->num_alloc_vfs) - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, - ICE_RES_VF_CTRL_VEC_ID); + if (vsi->type == ICE_VSI_CTRL && vsi->vf) { + base = ice_get_vf_ctrl_res(pf, vsi); } else { base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); @@ -1431,6 +1458,7 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) */ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) { + bool dvm_ena = ice_is_dvm_ena(&vsi->back->hw); struct ice_pf *pf = vsi->back; struct device *dev; u16 i; @@ -1452,6 +1480,10 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->tx_tstamps = &pf->ptp.port.tx; ring->dev = dev; ring->count = vsi->num_tx_desc; + if (dvm_ena) + ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2; + else + ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG1; WRITE_ONCE(vsi->tx_rings[i], ring); } @@ -1763,62 +1795,6 @@ void ice_update_eth_stats(struct ice_vsi *vsi) } /** - * ice_vsi_add_vlan - Add VSI membership for given VLAN - * @vsi: the VSI being configured - * @vid: VLAN ID to be added - * @action: filter action to be performed on match - */ -int -ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action) -{ - struct ice_pf *pf = vsi->back; - struct device *dev; - int err = 0; - - dev = ice_pf_to_dev(pf); - - if (!ice_fltr_add_vlan(vsi, vid, action)) { - vsi->num_vlan++; - } else { - err = -ENODEV; - dev_err(dev, "Failure Adding VLAN %d on VSI %i\n", vid, - vsi->vsi_num); - } - - return err; -} - -/** - * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN - * @vsi: the VSI being configured - * @vid: VLAN ID to be removed - * - * Returns 0 on success and negative on failure - */ -int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) -{ - struct ice_pf *pf = vsi->back; - struct device *dev; - int err; - - dev = ice_pf_to_dev(pf); - - err = ice_fltr_remove_vlan(vsi, vid, ICE_FWD_TO_VSI); - if (!err) { - vsi->num_vlan--; - } else if (err == -ENOENT) { - dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, error: %d\n", - vid, vsi->vsi_num, err); - err = 0; - } else { - dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n", - vid, vsi->vsi_num, err); - } - - return err; -} - -/** * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length * @vsi: VSI */ @@ -2146,95 +2122,6 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) } /** - * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx - * @vsi: the VSI being changed - */ -int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) -{ - struct ice_hw *hw = &vsi->back->hw; - struct ice_vsi_ctx *ctxt; - int ret; - - ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (!ctxt) - return -ENOMEM; - - /* Here we are configuring the VSI to let the driver add VLAN tags by - * setting vlan_flags to ICE_AQ_VSI_VLAN_MODE_ALL. The actual VLAN tag - * insertion happens in the Tx hot path, in ice_tx_map. - */ - ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL; - - /* Preserve existing VLAN strip setting */ - ctxt->info.vlan_flags |= (vsi->info.vlan_flags & - ICE_AQ_VSI_VLAN_EMOD_M); - - ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); - - ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); - if (ret) { - dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %s\n", - ret, ice_aq_str(hw->adminq.sq_last_status)); - goto out; - } - - vsi->info.vlan_flags = ctxt->info.vlan_flags; -out: - kfree(ctxt); - return ret; -} - -/** - * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx - * @vsi: the VSI being changed - * @ena: boolean value indicating if this is a enable or disable request - */ -int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) -{ - struct ice_hw *hw = &vsi->back->hw; - struct ice_vsi_ctx *ctxt; - int ret; - - /* do not allow modifying VLAN stripping when a port VLAN is configured - * on this VSI - */ - if (vsi->info.pvid) - return 0; - - ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (!ctxt) - return -ENOMEM; - - /* Here we are configuring what the VSI should do with the VLAN tag in - * the Rx packet. We can either leave the tag in the packet or put it in - * the Rx descriptor. - */ - if (ena) - /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_STR_BOTH; - else - /* Disable stripping. Leave tag in packet */ - ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING; - - /* Allow all packets untagged/tagged */ - ctxt->info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL; - - ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); - - ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); - if (ret) { - dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %s\n", - ena, ret, ice_aq_str(hw->adminq.sq_last_status)); - goto out; - } - - vsi->info.vlan_flags = ctxt->info.vlan_flags; -out: - kfree(ctxt); - return ret; -} - -/** * ice_vsi_start_all_rx_rings - start/enable all of a VSI's Rx rings * @vsi: the VSI whose rings are to be enabled * @@ -2327,61 +2214,6 @@ bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi) return (vsi->info.sw_flags2 & ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA); } -/** - * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI - * @vsi: VSI to enable or disable VLAN pruning on - * @ena: set to true to enable VLAN pruning and false to disable it - * - * returns 0 if VSI is updated, negative otherwise - */ -int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena) -{ - struct ice_vsi_ctx *ctxt; - struct ice_pf *pf; - int status; - - if (!vsi) - return -EINVAL; - - /* Don't enable VLAN pruning if the netdev is currently in promiscuous - * mode. VLAN pruning will be enabled when the interface exits - * promiscuous mode if any VLAN filters are active. - */ - if (vsi->netdev && vsi->netdev->flags & IFF_PROMISC && ena) - return 0; - - pf = vsi->back; - ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (!ctxt) - return -ENOMEM; - - ctxt->info = vsi->info; - - if (ena) - ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - else - ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - - ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); - - status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL); - if (status) { - netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %s\n", - ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, - status, ice_aq_str(pf->hw.adminq.sq_last_status)); - goto err_out; - } - - vsi->info.sw_flags2 = ctxt->info.sw_flags2; - - kfree(ctxt); - return 0; - -err_out: - kfree(ctxt); - return -EIO; -} - static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) { if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) { @@ -2416,7 +2248,7 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) } if (vsi->type == ICE_VSI_VF) { - struct ice_vf *vf = &vsi->back->vf[vsi->vf_id]; + struct ice_vf *vf = vsi->vf; q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector); } else { @@ -2601,9 +2433,8 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) * @pf: board private structure * @pi: pointer to the port_info instance * @vsi_type: VSI type - * @vf_id: defines VF ID to which this VSI connects. This field is meant to be - * used only for ICE_VSI_VF VSI type. For other VSI types, should - * fill-in ICE_INVAL_VFID as input. + * @vf: pointer to VF to which this VSI connects. This field is used primarily + * for the ICE_VSI_VF type. Other VSI types should pass NULL. * @ch: ptr to channel * * This allocates the sw VSI structure and its queue resources. @@ -2613,7 +2444,8 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) */ struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch) + enum ice_vsi_type vsi_type, struct ice_vf *vf, + struct ice_channel *ch) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct device *dev = ice_pf_to_dev(pf); @@ -2621,11 +2453,11 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, int ret, i; if (vsi_type == ICE_VSI_CHNL) - vsi = ice_vsi_alloc(pf, vsi_type, ch, ICE_INVAL_VFID); + vsi = ice_vsi_alloc(pf, vsi_type, ch, NULL); else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) - vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf_id); + vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf); else - vsi = ice_vsi_alloc(pf, vsi_type, NULL, ICE_INVAL_VFID); + vsi = ice_vsi_alloc(pf, vsi_type, NULL, NULL); if (!vsi) { dev_err(dev, "could not allocate VSI\n"); @@ -2637,9 +2469,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (vsi->type == ICE_VSI_PF) vsi->ethtype = ETH_P_PAUSE; - if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL) - vsi->vf_id = vf_id; - ice_alloc_fd_res(vsi); if (vsi_type != ICE_VSI_CHNL) { @@ -2661,6 +2490,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_get_qs; + ice_vsi_init_vlan_ops(vsi); + switch (vsi->type) { case ICE_VSI_CTRL: case ICE_VSI_SWITCHDEV_CTRL: @@ -2681,17 +2512,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; - /* Always add VLAN ID 0 switch rule by default. This is needed - * in order to allow all untagged and 0 tagged priority traffic - * if Rx VLAN pruning is enabled. Also there are cases where we - * don't get the call to add VLAN 0 via ice_vlan_rx_add_vid() - * so this handles those cases (i.e. adding the PF to a bridge - * without the 8021q module loaded). - */ - ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI); - if (ret) - goto unroll_clear_rings; - ice_vsi_map_rings_to_vectors(vsi); /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ @@ -3069,6 +2889,37 @@ void ice_napi_del(struct ice_vsi *vsi) } /** + * ice_free_vf_ctrl_res - Free the VF control VSI resource + * @pf: pointer to PF structure + * @vsi: the VSI to free resources for + * + * Check if the VF control VSI resource is still in use. If no VF is using it + * any more, release the VSI resource. Otherwise, leave it to be cleaned up + * once no other VF uses it. + */ +static void ice_free_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) +{ + struct ice_vf *vf; + unsigned int bkt; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + + /* No other VFs left that have control VSI. It is now safe to reclaim + * SW interrupts back to the common pool. + */ + ice_free_res(pf->irq_tracker, vsi->base_vector, + ICE_RES_VF_CTRL_VEC_ID); + pf->num_avail_sw_msix += vsi->num_q_vectors; +} + +/** * ice_vsi_release - Delete a VSI and free its resources * @vsi: the VSI being removed * @@ -3111,23 +2962,8 @@ int ice_vsi_release(struct ice_vsi *vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) - break; - } - if (i == pf->num_alloc_vfs) { - /* No other VFs left that have control VSI, reclaim SW - * interrupts back to the common pool - */ - ice_free_res(pf->irq_tracker, vsi->base_vector, - ICE_RES_VF_CTRL_VEC_ID); - pf->num_avail_sw_msix += vsi->num_q_vectors; - } + if (vsi->type == ICE_VSI_CTRL && vsi->vf) { + ice_free_vf_ctrl_res(pf, vsi); } else if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); @@ -3311,7 +3147,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_coalesce_stored *coalesce; int prev_num_q_vectors = 0; - struct ice_vf *vf = NULL; enum ice_vsi_type vtype; struct ice_pf *pf; int ret, i; @@ -3321,8 +3156,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) pf = vsi->back; vtype = vsi->type; - if (vtype == ICE_VSI_VF) - vf = &pf->vf[vsi->vf_id]; + if (WARN_ON(vtype == ICE_VSI_VF) && !vsi->vf) + return -EINVAL; + + ice_vsi_init_vlan_ops(vsi); coalesce = kcalloc(vsi->num_q_vectors, sizeof(struct ice_coalesce_stored), GFP_KERNEL); @@ -3359,9 +3196,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); if (vtype == ICE_VSI_VF) - ice_vsi_set_num_qs(vsi, vf->vf_id); + ice_vsi_set_num_qs(vsi, vsi->vf); else - ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); + ice_vsi_set_num_qs(vsi, NULL); ret = ice_vsi_alloc_arrays(vsi); if (ret < 0) @@ -4137,6 +3974,120 @@ int ice_set_link(struct ice_vsi *vsi, bool ena) } /** + * ice_vsi_add_vlan_zero - add VLAN 0 filter(s) for this VSI + * @vsi: VSI used to add VLAN filters + * + * In Single VLAN Mode (SVM), single VLAN filters via ICE_SW_LKUP_VLAN are based + * on the inner VLAN ID, so the VLAN TPID (i.e. 0x8100 or 0x888a8) doesn't + * matter. In Double VLAN Mode (DVM), outer/single VLAN filters via + * ICE_SW_LKUP_VLAN are based on the outer/single VLAN ID + VLAN TPID. + * + * For both modes add a VLAN 0 + no VLAN TPID filter to handle untagged traffic + * when VLAN pruning is enabled. Also, this handles VLAN 0 priority tagged + * traffic in SVM, since the VLAN TPID isn't part of filtering. + * + * If DVM is enabled then an explicit VLAN 0 + VLAN TPID filter needs to be + * added to allow VLAN 0 priority tagged traffic in DVM, since the VLAN TPID is + * part of filtering. + */ +int ice_vsi_add_vlan_zero(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + struct ice_vlan vlan; + int err; + + vlan = ICE_VLAN(0, 0, 0); + err = vlan_ops->add_vlan(vsi, &vlan); + if (err && err != -EEXIST) + return err; + + /* in SVM both VLAN 0 filters are identical */ + if (!ice_is_dvm_ena(&vsi->back->hw)) + return 0; + + vlan = ICE_VLAN(ETH_P_8021Q, 0, 0); + err = vlan_ops->add_vlan(vsi, &vlan); + if (err && err != -EEXIST) + return err; + + return 0; +} + +/** + * ice_vsi_del_vlan_zero - delete VLAN 0 filter(s) for this VSI + * @vsi: VSI used to add VLAN filters + * + * Delete the VLAN 0 filters in the same manner that they were added in + * ice_vsi_add_vlan_zero. + */ +int ice_vsi_del_vlan_zero(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + struct ice_vlan vlan; + int err; + + vlan = ICE_VLAN(0, 0, 0); + err = vlan_ops->del_vlan(vsi, &vlan); + if (err && err != -EEXIST) + return err; + + /* in SVM both VLAN 0 filters are identical */ + if (!ice_is_dvm_ena(&vsi->back->hw)) + return 0; + + vlan = ICE_VLAN(ETH_P_8021Q, 0, 0); + err = vlan_ops->del_vlan(vsi, &vlan); + if (err && err != -EEXIST) + return err; + + return 0; +} + +/** + * ice_vsi_num_zero_vlans - get number of VLAN 0 filters based on VLAN mode + * @vsi: VSI used to get the VLAN mode + * + * If DVM is enabled then 2 VLAN 0 filters are added, else if SVM is enabled + * then 1 VLAN 0 filter is added. See ice_vsi_add_vlan_zero for more details. + */ +static u16 ice_vsi_num_zero_vlans(struct ice_vsi *vsi) +{ +#define ICE_DVM_NUM_ZERO_VLAN_FLTRS 2 +#define ICE_SVM_NUM_ZERO_VLAN_FLTRS 1 + /* no VLAN 0 filter is created when a port VLAN is active */ + if (vsi->type == ICE_VSI_VF) { + if (WARN_ON(!vsi->vf)) + return 0; + + if (ice_vf_is_port_vlan_ena(vsi->vf)) + return 0; + } + + if (ice_is_dvm_ena(&vsi->back->hw)) + return ICE_DVM_NUM_ZERO_VLAN_FLTRS; + else + return ICE_SVM_NUM_ZERO_VLAN_FLTRS; +} + +/** + * ice_vsi_has_non_zero_vlans - check if VSI has any non-zero VLANs + * @vsi: VSI used to determine if any non-zero VLANs have been added + */ +bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi) +{ + return (vsi->num_vlan > ice_vsi_num_zero_vlans(vsi)); +} + +/** + * ice_vsi_num_non_zero_vlans - get the number of non-zero VLANs for this VSI + * @vsi: VSI used to get the number of non-zero VLANs added + */ +u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi) +{ + return (vsi->num_vlan - ice_vsi_num_zero_vlans(vsi)); +} + +/** * ice_is_feature_supported * @pf: pointer to the struct ice_pf instance * @f: feature enum to be checked @@ -4190,8 +4141,11 @@ void ice_init_feature_support(struct ice_pf *pf) case ICE_DEV_ID_E810C_QSFP: case ICE_DEV_ID_E810C_SFP: ice_set_feature_support(pf, ICE_F_DSCP); - if (ice_is_e810t(&pf->hw)) + if (ice_is_e810t(&pf->hw)) { ice_set_feature_support(pf, ICE_F_SMA_CTRL); + if (ice_gnss_is_gps_present(&pf->hw)) + ice_set_feature_support(pf, ICE_F_GNSS); + } break; default: break; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b2ed189527d6..0095329949d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -5,6 +5,7 @@ #define _ICE_LIB_H_ #include "ice.h" +#include "ice_vlan.h" const char *ice_vsi_type_str(enum ice_vsi_type vsi_type); @@ -22,15 +23,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi); void ice_vsi_cfg_msix(struct ice_vsi *vsi); -int -ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action); - -int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid); - -int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi); - -int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena); - int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi); int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi); @@ -45,8 +37,6 @@ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi); bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi); -int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena); - void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); int ice_set_link(struct ice_vsi *vsi, bool ena); @@ -62,7 +52,8 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch); + enum ice_vsi_type vsi_type, struct ice_vf *vf, + struct ice_channel *ch); void ice_napi_del(struct ice_vsi *vsi); @@ -110,7 +101,7 @@ void ice_set_q_vector_intrl(struct ice_q_vector *q_vector); int ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); bool ice_is_safe_mode(struct ice_pf *pf); -bool ice_is_aux_ena(struct ice_pf *pf); +bool ice_is_rdma_ena(struct ice_pf *pf); bool ice_is_dflt_vsi_in_use(struct ice_sw *sw); bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); @@ -132,7 +123,10 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); - +int ice_vsi_add_vlan_zero(struct ice_vsi *vsi); +int ice_vsi_del_vlan_zero(struct ice_vsi *vsi); +bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi); +u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi); bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f); void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f); void ice_init_feature_support(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 493942e910be..85b18548c5ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -21,6 +21,7 @@ #include "ice_trace.h" #include "ice_eswitch.h" #include "ice_tc_lib.h" +#include "ice_vsi_vlan_ops.h" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" static const char ice_driver_string[] = DRV_SUMMARY; @@ -244,7 +245,7 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) if (vsi->type != ICE_VSI_PF) return 0; - if (vsi->num_vlan > 1) + if (ice_vsi_has_non_zero_vlans(vsi)) status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m); else status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); @@ -264,7 +265,7 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) if (vsi->type != ICE_VSI_PF) return 0; - if (vsi->num_vlan > 1) + if (ice_vsi_has_non_zero_vlans(vsi)) status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi, promisc_m); else status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); @@ -279,6 +280,7 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) */ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) { + struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); struct device *dev = ice_pf_to_dev(vsi->back); struct net_device *netdev = vsi->netdev; bool promisc_forced_on = false; @@ -352,7 +354,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { if (vsi->current_netdev_flags & IFF_ALLMULTI) { - if (vsi->num_vlan > 1) + if (ice_vsi_has_non_zero_vlans(vsi)) promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; else promisc_m = ICE_MCAST_PROMISC_BITS; @@ -366,7 +368,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } } else { /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */ - if (vsi->num_vlan > 1) + if (ice_vsi_has_non_zero_vlans(vsi)) promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; else promisc_m = ICE_MCAST_PROMISC_BITS; @@ -396,7 +398,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) goto out_promisc; } err = 0; - ice_cfg_vlan_pruning(vsi, false); + vlan_ops->dis_rx_filtering(vsi); } } else { /* Clear Rx filter to remove traffic from wire */ @@ -409,8 +411,9 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) IFF_PROMISC; goto out_promisc; } - if (vsi->num_vlan > 1) - ice_cfg_vlan_pruning(vsi, true); + if (vsi->current_netdev_flags & + NETIF_F_HW_VLAN_CTAG_FILTER) + vlan_ops->ena_rx_filtering(vsi); } } } @@ -502,7 +505,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_hw *hw = &pf->hw; struct ice_vsi *vsi; - unsigned int i; + struct ice_vf *vf; + unsigned int bkt; dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type); @@ -517,8 +521,10 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) ice_vc_notify_reset(pf); /* Disable VFs until reset is completed */ - ice_for_each_vf(pf, i) - ice_set_vf_state_qs_dis(&pf->vf[i]); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) + ice_set_vf_state_qs_dis(vf); + mutex_unlock(&pf->vfs.table_lock); if (ice_is_eswitch_mode_switchdev(pf)) { if (reset_type != ICE_RESET_PFR) @@ -565,6 +571,9 @@ skip: if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_prepare_for_reset(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_exit(pf); + if (hw->port_info) ice_sched_clear_port(hw->port_info); @@ -1660,7 +1669,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - unsigned int i; + struct ice_vf *vf; + unsigned int bkt; u32 reg; if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) { @@ -1748,47 +1758,46 @@ static void ice_handle_mdd_event(struct ice_pf *pf) /* Check to see if one of the VFs caused an MDD event, and then * increment counters and set print pending */ - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - reg = rd32(hw, VP_MDET_TX_PQM(i)); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id)); if (reg & VP_MDET_TX_PQM_VALID_M) { - wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); + wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_TX_TCLAN(i)); + reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id)); if (reg & VP_MDET_TX_TCLAN_VALID_M) { - wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); + wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_TX_TDPU(i)); + reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id)); if (reg & VP_MDET_TX_TDPU_VALID_M) { - wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); + wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF); vf->mdd_tx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", - i); + vf->vf_id); } - reg = rd32(hw, VP_MDET_RX(i)); + reg = rd32(hw, VP_MDET_RX(vf->vf_id)); if (reg & VP_MDET_RX_VALID_M) { - wr32(hw, VP_MDET_RX(i), 0xFFFF); + wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF); vf->mdd_rx_events.count++; set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", - i); + vf->vf_id); /* Since the queue is disabled on VF Rx MDD events, the * PF can be configured to reset the VF through ethtool @@ -1799,12 +1808,13 @@ static void ice_handle_mdd_event(struct ice_pf *pf) * reset, so print the event prior to reset. */ ice_print_vf_rx_mdd_event(vf); - mutex_lock(&pf->vf[i].cfg_lock); - ice_reset_vf(&pf->vf[i], false); - mutex_unlock(&pf->vf[i].cfg_lock); + mutex_lock(&vf->cfg_lock); + ice_reset_vf(vf, false); + mutex_unlock(&vf->cfg_lock); } } } + mutex_unlock(&pf->vfs.table_lock); ice_print_vfs_mdd_events(pf); } @@ -2454,7 +2464,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) + if (vsi->type == ICE_VSI_CTRL && vsi->vf) err = devm_request_irq(dev, irq_num, vsi->irq_handler, IRQF_SHARED, q_vector->name, q_vector); @@ -2521,10 +2531,10 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; xdp_ring->netdev = NULL; - xdp_ring->next_dd = ICE_TX_THRESH - 1; - xdp_ring->next_rs = ICE_TX_THRESH - 1; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; + xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1; + xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1; WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; @@ -3256,6 +3266,7 @@ static void ice_set_ops(struct net_device *netdev) static void ice_set_netdev_features(struct net_device *netdev) { struct ice_pf *pf = ice_netdev_to_pf(netdev); + bool is_dvm_ena = ice_is_dvm_ena(&pf->hw); netdev_features_t csumo_features; netdev_features_t vlano_features; netdev_features_t dflt_features; @@ -3282,6 +3293,10 @@ static void ice_set_netdev_features(struct net_device *netdev) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + /* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */ + if (is_dvm_ena) + vlano_features |= NETIF_F_HW_VLAN_STAG_FILTER; + tso_features = NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | @@ -3313,6 +3328,15 @@ static void ice_set_netdev_features(struct net_device *netdev) tso_features; netdev->vlan_features |= dflt_features | csumo_features | tso_features; + + /* advertise support but don't enable by default since only one type of + * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one + * type turns on the other has to be turned off. This is enforced by the + * ice_fix_features() ndo callback. + */ + if (is_dvm_ena) + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX | + NETIF_F_HW_VLAN_STAG_TX; } /** @@ -3387,14 +3411,14 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL); } static struct ice_vsi * ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_channel *ch) { - return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch); + return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch); } /** @@ -3408,7 +3432,7 @@ ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, static struct ice_vsi * ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL); } /** @@ -3422,40 +3446,37 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi * ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID, NULL); + return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL); } /** * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload * @netdev: network interface to be adjusted - * @proto: unused protocol + * @proto: VLAN TPID * @vid: VLAN ID to be added * * net_device_ops implementation for adding VLAN IDs */ static int -ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, - u16 vid) +ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi_vlan_ops *vlan_ops; struct ice_vsi *vsi = np->vsi; + struct ice_vlan vlan; int ret; /* VLAN 0 is added by default during load/reset */ if (!vid) return 0; - /* Enable VLAN pruning when a VLAN other than 0 is added */ - if (!ice_vsi_is_vlan_pruning_ena(vsi)) { - ret = ice_cfg_vlan_pruning(vsi, true); - if (ret) - return ret; - } + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged * packets aren't pruned by the device's internal switch on Rx */ - ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); + vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0); + ret = vlan_ops->add_vlan(vsi, &vlan); if (!ret) set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); @@ -3465,36 +3486,36 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, /** * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload * @netdev: network interface to be adjusted - * @proto: unused protocol + * @proto: VLAN TPID * @vid: VLAN ID to be removed * * net_device_ops implementation for removing VLAN IDs */ static int -ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, - u16 vid) +ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi_vlan_ops *vlan_ops; struct ice_vsi *vsi = np->vsi; + struct ice_vlan vlan; int ret; /* don't allow removal of VLAN 0 */ if (!vid) return 0; - /* Make sure ice_vsi_kill_vlan is successful before updating VLAN + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + + /* Make sure VLAN delete is successful before updating VLAN * information */ - ret = ice_vsi_kill_vlan(vsi, vid); + vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0); + ret = vlan_ops->del_vlan(vsi, &vlan); if (ret) return ret; - /* Disable pruning when VLAN 0 is the only VLAN rule */ - if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi)) - ret = ice_cfg_vlan_pruning(vsi, false); - set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); - return ret; + return 0; } /** @@ -3563,12 +3584,17 @@ static int ice_tc_indir_block_register(struct ice_vsi *vsi) static int ice_setup_pf_sw(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); + bool dvm = ice_is_dvm_ena(&pf->hw); struct ice_vsi *vsi; int status; if (ice_is_reset_in_progress(pf->state)) return -EBUSY; + status = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL); + if (status) + return -EIO; + vsi = ice_pf_vsi_setup(pf, pf->hw.port_info); if (!vsi) return -ENOMEM; @@ -3679,6 +3705,7 @@ static void ice_deinit_pf(struct ice_pf *pf) mutex_destroy(&pf->sw_mutex); mutex_destroy(&pf->tc_mutex); mutex_destroy(&pf->avail_q_mutex); + mutex_destroy(&pf->vfs.table_lock); if (pf->avail_txqs) { bitmap_free(pf->avail_txqs); @@ -3703,18 +3730,15 @@ static void ice_set_pf_caps(struct ice_pf *pf) struct ice_hw_func_caps *func_caps = &pf->hw.func_caps; clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - clear_bit(ICE_FLAG_AUX_ENA, pf->flags); - if (func_caps->common_cap.rdma) { + if (func_caps->common_cap.rdma) set_bit(ICE_FLAG_RDMA_ENA, pf->flags); - set_bit(ICE_FLAG_AUX_ENA, pf->flags); - } clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); if (func_caps->common_cap.dcb) set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); if (func_caps->common_cap.sr_iov_1_1) { set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); - pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs, + pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs, ICE_MAX_VF_COUNT); } clear_bit(ICE_FLAG_RSS_ENA, pf->flags); @@ -3781,6 +3805,9 @@ static int ice_init_pf(struct ice_pf *pf) return -ENOMEM; } + mutex_init(&pf->vfs.table_lock); + hash_init(pf->vfs.table); + return 0; } @@ -3835,7 +3862,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) v_left -= needed; /* reserve vectors for RDMA auxiliary driver */ - if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + if (ice_is_rdma_ena(pf)) { needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; if (v_left < needed) goto no_hw_vecs_left_err; @@ -3876,7 +3903,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) int v_remain = v_actual - v_other; int v_rdma = 0, v_min_rdma = 0; - if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + if (ice_is_rdma_ena(pf)) { /* Need at least 1 interrupt in addition to * AEQ MSIX */ @@ -3910,7 +3937,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", pf->num_lan_msix); - if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + if (ice_is_rdma_ena(pf)) dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", pf->num_rdma_msix); } @@ -4090,8 +4117,8 @@ static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf) ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; /* allow all VLANs on Tx and don't strip on Rx */ - ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL | - ICE_AQ_VSI_VLAN_EMOD_NOTHING; + ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL | + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { @@ -4100,7 +4127,7 @@ static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf) } else { vsi->info.sec_flags = ctxt->info.sec_flags; vsi->info.sw_flags2 = ctxt->info.sw_flags2; - vsi->info.vlan_flags = ctxt->info.vlan_flags; + vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags; } kfree(ctxt); @@ -4485,8 +4512,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) /* set up for high or low DMA */ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (err) - err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (err) { dev_err(dev, "DMA configuration failed: 0x%x\n", err); return err; @@ -4709,6 +4734,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_init(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_init(pf); + /* Note: Flow director init failure is non-fatal to load */ if (ice_init_fdir(pf)) dev_err(dev, "could not initialize flow director\n"); @@ -4738,7 +4766,7 @@ probe_done: /* ready to go, so clear down state bit */ clear_bit(ICE_DOWN, pf->state); - if (ice_is_aux_ena(pf)) { + if (ice_is_rdma_ena(pf)) { pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL); if (pf->aux_idx < 0) { dev_err(dev, "Failed to allocate device ID for AUX driver\n"); @@ -4884,6 +4912,8 @@ static void ice_remove(struct pci_dev *pdev) ice_deinit_lag(pf); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_release(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_exit(pf); if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); @@ -5596,6 +5626,194 @@ ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], return err; } +#define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_RX | \ + NETIF_F_HW_VLAN_STAG_TX) + +#define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_HW_VLAN_STAG_FILTER) + +/** + * ice_fix_features - fix the netdev features flags based on device limitations + * @netdev: ptr to the netdev that flags are being fixed on + * @features: features that need to be checked and possibly fixed + * + * Make sure any fixups are made to features in this callback. This enables the + * driver to not have to check unsupported configurations throughout the driver + * because that's the responsiblity of this callback. + * + * Single VLAN Mode (SVM) Supported Features: + * NETIF_F_HW_VLAN_CTAG_FILTER + * NETIF_F_HW_VLAN_CTAG_RX + * NETIF_F_HW_VLAN_CTAG_TX + * + * Double VLAN Mode (DVM) Supported Features: + * NETIF_F_HW_VLAN_CTAG_FILTER + * NETIF_F_HW_VLAN_CTAG_RX + * NETIF_F_HW_VLAN_CTAG_TX + * + * NETIF_F_HW_VLAN_STAG_FILTER + * NETIF_HW_VLAN_STAG_RX + * NETIF_HW_VLAN_STAG_TX + * + * Features that need fixing: + * Cannot simultaneously enable CTAG and STAG stripping and/or insertion. + * These are mutually exlusive as the VSI context cannot support multiple + * VLAN ethertypes simultaneously for stripping and/or insertion. If this + * is not done, then default to clearing the requested STAG offload + * settings. + * + * All supported filtering has to be enabled or disabled together. For + * example, in DVM, CTAG and STAG filtering have to be enabled and disabled + * together. If this is not done, then default to VLAN filtering disabled. + * These are mutually exclusive as there is currently no way to + * enable/disable VLAN filtering based on VLAN ethertype when using VLAN + * prune rules. + */ +static netdev_features_t +ice_fix_features(struct net_device *netdev, netdev_features_t features) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + netdev_features_t supported_vlan_filtering; + netdev_features_t requested_vlan_filtering; + struct ice_vsi *vsi = np->vsi; + + requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES; + + /* make sure supported_vlan_filtering works for both SVM and DVM */ + supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER; + if (ice_is_dvm_ena(&vsi->back->hw)) + supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER; + + if (requested_vlan_filtering && + requested_vlan_filtering != supported_vlan_filtering) { + if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) { + netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n"); + features |= supported_vlan_filtering; + } else { + netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n"); + features &= ~supported_vlan_filtering; + } + } + + if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) && + (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))) { + netdev_warn(netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n"); + features &= ~(NETIF_F_HW_VLAN_STAG_RX | + NETIF_F_HW_VLAN_STAG_TX); + } + + return features; +} + +/** + * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI + * @vsi: PF's VSI + * @features: features used to determine VLAN offload settings + * + * First, determine the vlan_ethertype based on the VLAN offload bits in + * features. Then determine if stripping and insertion should be enabled or + * disabled. Finally enable or disable VLAN stripping and insertion. + */ +static int +ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features) +{ + bool enable_stripping = true, enable_insertion = true; + struct ice_vsi_vlan_ops *vlan_ops; + int strip_err = 0, insert_err = 0; + u16 vlan_ethertype = 0; + + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + + if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX)) + vlan_ethertype = ETH_P_8021AD; + else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) + vlan_ethertype = ETH_P_8021Q; + + if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX))) + enable_stripping = false; + if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX))) + enable_insertion = false; + + if (enable_stripping) + strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype); + else + strip_err = vlan_ops->dis_stripping(vsi); + + if (enable_insertion) + insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype); + else + insert_err = vlan_ops->dis_insertion(vsi); + + if (strip_err || insert_err) + return -EIO; + + return 0; +} + +/** + * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI + * @vsi: PF's VSI + * @features: features used to determine VLAN filtering settings + * + * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the + * features. + */ +static int +ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features) +{ + struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + int err = 0; + + /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking + * if either bit is set + */ + if (features & + (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) + err = vlan_ops->ena_rx_filtering(vsi); + else + err = vlan_ops->dis_rx_filtering(vsi); + + return err; +} + +/** + * ice_set_vlan_features - set VLAN settings based on suggested feature set + * @netdev: ptr to the netdev being adjusted + * @features: the feature set that the stack is suggesting + * + * Only update VLAN settings if the requested_vlan_features are different than + * the current_vlan_features. + */ +static int +ice_set_vlan_features(struct net_device *netdev, netdev_features_t features) +{ + netdev_features_t current_vlan_features, requested_vlan_features; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int err; + + current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES; + requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES; + if (current_vlan_features ^ requested_vlan_features) { + err = ice_set_vlan_offload_features(vsi, features); + if (err) + return err; + } + + current_vlan_features = netdev->features & + NETIF_VLAN_FILTERING_FEATURES; + requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES; + if (current_vlan_features ^ requested_vlan_features) { + err = ice_set_vlan_filtering_features(vsi, features); + if (err) + return err; + } + + return 0; +} + /** * ice_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted @@ -5630,26 +5848,9 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) netdev->features & NETIF_F_RXHASH) ice_vsi_manage_rss_lut(vsi, false); - if ((features & NETIF_F_HW_VLAN_CTAG_RX) && - !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) - ret = ice_vsi_manage_vlan_stripping(vsi, true); - else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && - (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) - ret = ice_vsi_manage_vlan_stripping(vsi, false); - - if ((features & NETIF_F_HW_VLAN_CTAG_TX) && - !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) - ret = ice_vsi_manage_vlan_insertion(vsi); - else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) && - (netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) - ret = ice_vsi_manage_vlan_insertion(vsi); - - if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) && - !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) - ret = ice_cfg_vlan_pruning(vsi, true); - else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) && - (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) - ret = ice_cfg_vlan_pruning(vsi, false); + ret = ice_set_vlan_features(netdev, features); + if (ret) + return ret; if ((features & NETIF_F_NTUPLE) && !(netdev->features & NETIF_F_NTUPLE)) { @@ -5673,23 +5874,26 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) else clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags); - return ret; + return 0; } /** - * ice_vsi_vlan_setup - Setup VLAN offload properties on a VSI + * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI * @vsi: VSI to setup VLAN properties for */ static int ice_vsi_vlan_setup(struct ice_vsi *vsi) { - int ret = 0; + int err; - if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) - ret = ice_vsi_manage_vlan_stripping(vsi, true); - if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX) - ret = ice_vsi_manage_vlan_insertion(vsi); + err = ice_set_vlan_offload_features(vsi, vsi->netdev->features); + if (err) + return err; - return ret; + err = ice_set_vlan_filtering_features(vsi, vsi->netdev->features); + if (err) + return err; + + return ice_vsi_add_vlan_zero(vsi); } /** @@ -6290,11 +6494,12 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) */ int ice_down(struct ice_vsi *vsi) { - int i, tx_err, rx_err, link_err = 0; + int i, tx_err, rx_err, link_err = 0, vlan_err = 0; WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state)); if (vsi->netdev && vsi->type == ICE_VSI_PF) { + vlan_err = ice_vsi_del_vlan_zero(vsi); if (!ice_is_e810(&vsi->back->hw)) ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); @@ -6336,7 +6541,7 @@ int ice_down(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) ice_clean_rx_ring(vsi->rx_rings[i]); - if (tx_err || rx_err || link_err) { + if (tx_err || rx_err || link_err || vlan_err) { netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); return -EIO; @@ -6646,6 +6851,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; + bool dvm; int err; if (test_bit(ICE_DOWN, pf->state)) @@ -6709,6 +6915,12 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_init_ctrlq; } + dvm = ice_is_dvm_ena(hw); + + err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL); + if (err) + goto err_init_ctrlq; + err = ice_sched_init_port(hw->port_info); if (err) goto err_sched_init_port; @@ -6745,6 +6957,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_reset(pf); + if (ice_is_feature_supported(pf, ICE_F_GNSS)) + ice_gnss_init(pf); + /* rebuild PF VSI */ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF); if (err) { @@ -8605,6 +8820,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_start_xmit = ice_start_xmit, .ndo_select_queue = ice_select_queue, .ndo_features_check = ice_features_check, + .ndo_fix_features = ice_fix_features, .ndo_set_rx_mode = ice_set_rx_mode, .ndo_set_mac_address = ice_set_mac_address, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h index f57c414bc0a9..380e8ae94fc9 100644 --- a/drivers/net/ethernet/intel/ice/ice_osdep.h +++ b/drivers/net/ethernet/intel/ice/ice_osdep.h @@ -9,6 +9,7 @@ #ifndef CONFIG_64BIT #include <linux/io-64-nonatomic-lo-hi.h> #endif +#include <net/udp_tunnel.h> #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) #define rd32(a, reg) readl((a)->hw_addr + (reg)) diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c new file mode 100644 index 000000000000..976a03d3bdd5 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice_vsi_vlan_ops.h" +#include "ice_vsi_vlan_lib.h" +#include "ice_vlan_mode.h" +#include "ice.h" +#include "ice_pf_vsi_vlan_ops.h" + +void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + + if (ice_is_dvm_ena(&vsi->back->hw)) { + vlan_ops = &vsi->outer_vlan_ops; + + vlan_ops->add_vlan = ice_vsi_add_vlan; + vlan_ops->del_vlan = ice_vsi_del_vlan; + vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping; + vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping; + vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion; + vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion; + vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering; + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + } else { + vlan_ops = &vsi->inner_vlan_ops; + + vlan_ops->add_vlan = ice_vsi_add_vlan; + vlan_ops->del_vlan = ice_vsi_del_vlan; + vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping; + vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping; + vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; + vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; + vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering; + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + } +} + diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h new file mode 100644 index 000000000000..6741ec8c5f6b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_PF_VSI_VLAN_OPS_H_ +#define _ICE_PF_VSI_VLAN_OPS_H_ + +#include "ice_vsi_vlan_ops.h" + +struct ice_vsi; + +void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi); + +#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index ec8450f034e6..6dff97d53d81 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -3251,6 +3251,37 @@ int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data) } /** + * ice_read_pca9575_reg_e810t + * @hw: pointer to the hw struct + * @offset: GPIO controller register offset + * @data: pointer to data to be read from the GPIO controller + * + * Read the register from the GPIO controller + */ +int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data) +{ + struct ice_aqc_link_topo_addr link_topo; + __le16 addr; + u16 handle; + int err; + + memset(&link_topo, 0, sizeof(link_topo)); + + err = ice_get_pca9575_handle(hw, &handle); + if (err) + return err; + + link_topo.handle = cpu_to_le16(handle); + link_topo.topo_params.node_type_ctx = + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); + + addr = cpu_to_le16((u16)offset); + + return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); +} + +/** * ice_is_pca9575_present * @hw: pointer to the hw struct * diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 519e75462e67..1246e4ee4b5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -191,6 +191,7 @@ int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port); int ice_ptp_init_phy_e810(struct ice_hw *hw); int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data); int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data); +int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data); bool ice_is_pca9575_present(struct ice_hw *hw); #define PFTSYN_SEM_BYTES 4 @@ -443,4 +444,10 @@ bool ice_is_pca9575_present(struct ice_hw *hw); #define ICE_SMA_MAX_BIT_E810T 7 #define ICE_PCA9575_P1_OFFSET 8 +/* E810T PCA9575 IO controller registers */ +#define ICE_PCA9575_P0_IN 0x0 + +/* E810T PCA9575 IO controller pin control */ +#define ICE_E810T_P0_GNSS_PRSNT_N BIT(4) + #endif /* _ICE_PTP_HW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index dcc310e29300..2adfaf21e056 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -284,6 +284,8 @@ static int ice_repr_add(struct ice_vf *vf) devlink_port_type_eth_set(&vf->devlink_port, repr->netdev); + ice_vc_change_ops_to_repr(&vf->vc_ops); + return 0; err_netdev: @@ -311,6 +313,9 @@ err_alloc_rule: */ static void ice_repr_rem(struct ice_vf *vf) { + if (!vf->repr) + return; + ice_devlink_destroy_vf_port(vf); kfree(vf->repr->q_vector); vf->repr->q_vector = NULL; @@ -323,6 +328,23 @@ static void ice_repr_rem(struct ice_vf *vf) #endif kfree(vf->repr); vf->repr = NULL; + + ice_vc_set_dflt_vf_ops(&vf->vc_ops); +} + +/** + * ice_repr_rem_from_all_vfs - remove port representor for all VFs + * @pf: pointer to PF structure + */ +void ice_repr_rem_from_all_vfs(struct ice_pf *pf) +{ + struct ice_vf *vf; + unsigned int bkt; + + lockdep_assert_held(&pf->vfs.table_lock); + + ice_for_each_vf(pf, bkt, vf) + ice_repr_rem(vf); } /** @@ -331,49 +353,27 @@ static void ice_repr_rem(struct ice_vf *vf) */ int ice_repr_add_for_all_vfs(struct ice_pf *pf) { + struct ice_vf *vf; + unsigned int bkt; int err; - int i; - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + lockdep_assert_held(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { err = ice_repr_add(vf); if (err) goto err; - - ice_vc_change_ops_to_repr(&vf->vc_ops); } return 0; err: - for (i = i - 1; i >= 0; i--) { - struct ice_vf *vf = &pf->vf[i]; - - ice_repr_rem(vf); - ice_vc_set_dflt_vf_ops(&vf->vc_ops); - } + ice_repr_rem_from_all_vfs(pf); return err; } /** - * ice_repr_rem_from_all_vfs - remove port representor for all VFs - * @pf: pointer to PF structure - */ -void ice_repr_rem_from_all_vfs(struct ice_pf *pf) -{ - int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; - - ice_repr_rem(vf); - ice_vc_set_dflt_vf_ops(&vf->vc_ops); - } -} - -/** * ice_repr_start_tx_queues - start Tx queues of port representor * @repr: pointer to repr structure */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 475ec2afa210..9c40a8d58c71 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1097,6 +1097,64 @@ ice_aq_get_recipe(struct ice_hw *hw, } /** + * ice_update_recipe_lkup_idx - update a default recipe based on the lkup_idx + * @hw: pointer to the HW struct + * @params: parameters used to update the default recipe + * + * This function only supports updating default recipes and it only supports + * updating a single recipe based on the lkup_idx at a time. + * + * This is done as a read-modify-write operation. First, get the current recipe + * contents based on the recipe's ID. Then modify the field vector index and + * mask if it's valid at the lkup_idx. Finally, use the add recipe AQ to update + * the pre-existing recipe with the modifications. + */ +int +ice_update_recipe_lkup_idx(struct ice_hw *hw, + struct ice_update_recipe_lkup_idx_params *params) +{ + struct ice_aqc_recipe_data_elem *rcp_list; + u16 num_recps = ICE_MAX_NUM_RECIPES; + int status; + + rcp_list = kcalloc(num_recps, sizeof(*rcp_list), GFP_KERNEL); + if (!rcp_list) + return -ENOMEM; + + /* read current recipe list from firmware */ + rcp_list->recipe_indx = params->rid; + status = ice_aq_get_recipe(hw, rcp_list, &num_recps, params->rid, NULL); + if (status) { + ice_debug(hw, ICE_DBG_SW, "Failed to get recipe %d, status %d\n", + params->rid, status); + goto error_out; + } + + /* only modify existing recipe's lkup_idx and mask if valid, while + * leaving all other fields the same, then update the recipe firmware + */ + rcp_list->content.lkup_indx[params->lkup_idx] = params->fv_idx; + if (params->mask_valid) + rcp_list->content.mask[params->lkup_idx] = + cpu_to_le16(params->mask); + + if (params->ignore_valid) + rcp_list->content.lkup_indx[params->lkup_idx] |= + ICE_AQ_RECIPE_LKUP_IGNORE; + + status = ice_aq_add_recipe(hw, &rcp_list[0], 1, NULL); + if (status) + ice_debug(hw, ICE_DBG_SW, "Failed to update recipe %d lkup_idx %d fv_idx %d mask %d mask_valid %s, status %d\n", + params->rid, params->lkup_idx, params->fv_idx, + params->mask, params->mask_valid ? "true" : "false", + status); + +error_out: + kfree(rcp_list); + return status; +} + +/** * ice_aq_map_recipe_to_profile - Map recipe to packet profile * @hw: pointer to the HW struct * @profile_id: package profile ID to associate the recipe with @@ -1539,6 +1597,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc) { u16 vlan_id = ICE_MAX_VLAN_ID + 1; + u16 vlan_tpid = ETH_P_8021Q; void *daddr = NULL; u16 eth_hdr_sz; u8 *eth_hdr; @@ -1611,6 +1670,8 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, break; case ICE_SW_LKUP_VLAN: vlan_id = f_info->l_data.vlan.vlan_id; + if (f_info->l_data.vlan.tpid_valid) + vlan_tpid = f_info->l_data.vlan.tpid; if (f_info->fltr_act == ICE_FWD_TO_VSI || f_info->fltr_act == ICE_FWD_TO_VSI_LIST) { act |= ICE_SINGLE_ACT_PRUNE; @@ -1653,6 +1714,8 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, if (!(vlan_id > ICE_MAX_VLAN_ID)) { off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET); *off = cpu_to_be16(vlan_id); + off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET); + *off = cpu_to_be16(vlan_tpid); } /* Create the switch rule with the final dummy Ethernet header */ @@ -3868,6 +3931,23 @@ ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts, } /** + * ice_change_proto_id_to_dvm - change proto id in prot_id_tbl + * + * As protocol id for outer vlan is different in dvm and svm, if dvm is + * supported protocol array record for outer vlan has to be modified to + * reflect the value proper for DVM. + */ +void ice_change_proto_id_to_dvm(void) +{ + u8 i; + + for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++) + if (ice_prot_id_tbl[i].type == ICE_VLAN_OFOS && + ice_prot_id_tbl[i].protocol_id != ICE_VLAN_OF_HW) + ice_prot_id_tbl[i].protocol_id = ICE_VLAN_OF_HW; +} + +/** * ice_prot_type_to_id - get protocol ID from protocol type * @type: protocol type * @id: pointer to variable that will receive the ID diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index d8334beaaa8a..7b42c51a3eb0 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -33,15 +33,6 @@ struct ice_vsi_ctx { struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS]; }; -enum ice_sw_fwd_act_type { - ICE_FWD_TO_VSI = 0, - ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */ - ICE_FWD_TO_Q, - ICE_FWD_TO_QGRP, - ICE_DROP_PACKET, - ICE_INVAL_ACT -}; - /* Switch recipe ID enum values are specific to hardware */ enum ice_sw_lkup_type { ICE_SW_LKUP_ETHERTYPE = 0, @@ -86,6 +77,8 @@ struct ice_fltr_info { } mac_vlan; struct { u16 vlan_id; + u16 tpid; + u8 tpid_valid; } vlan; /* Set lkup_type as ICE_SW_LKUP_ETHERTYPE * if just using ethertype as filter. Set lkup_type as @@ -125,6 +118,15 @@ struct ice_fltr_info { u8 lan_en; /* Indicate if packet can be forwarded to the uplink */ }; +struct ice_update_recipe_lkup_idx_params { + u16 rid; + u16 fv_idx; + bool ignore_valid; + u16 mask; + bool mask_valid; + u8 lkup_idx; +}; + struct ice_adv_lkup_elem { enum ice_protocol_type type; union ice_prot_hdr h_u; /* Header values */ @@ -367,4 +369,8 @@ void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw); int ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd); +int +ice_update_recipe_lkup_idx(struct ice_hw *hw, + struct ice_update_recipe_lkup_idx_params *params); +void ice_change_proto_id_to_dvm(void); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3e38695f1c9d..853f57a9589a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -173,6 +173,8 @@ tx_skip_free: tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1; + tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1; if (!tx_ring->netdev) return; @@ -983,15 +985,17 @@ static struct sk_buff * ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { + unsigned int metasize = xdp->data - xdp->data_meta; unsigned int size = xdp->data_end - xdp->data; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - net_prefetch(xdp->data); + net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + ICE_RX_HDR_SIZE + metasize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; @@ -1003,8 +1007,13 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, - sizeof(long))); + memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, + ALIGN(headlen + metasize, sizeof(long))); + + if (metasize) { + skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; @@ -1080,7 +1089,7 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) { /* if we are the last buffer then there is nothing else to do */ #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) - if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) + if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) return false; rx_ring->rx_stats.non_eop_descs++; @@ -1142,7 +1151,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) * hardware wrote DD then it will be non-zero */ stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); - if (!ice_test_staterr(rx_desc, stat_err_bits)) + if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits)) break; /* This memory barrier is needed to keep us from reading @@ -1156,7 +1165,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) struct ice_vsi *ctrl_vsi = rx_ring->vsi; if (rx_desc->wb.rxdid == FDIR_DESC_RXDID && - ctrl_vsi->vf_id != ICE_INVAL_VFID) + ctrl_vsi->vf) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); ice_put_rx_buf(rx_ring, NULL, 0); cleaned_count++; @@ -1228,14 +1237,13 @@ construct_skb: continue; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); - if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) { + if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, + stat_err_bits))) { dev_kfree_skb_any(skb); continue; } - stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); - if (ice_test_staterr(rx_desc, stat_err_bits)) - vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); + vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc); /* pad the skb if needed, to make a valid ethernet frame */ if (eth_skb_pad(skb)) { @@ -1460,7 +1468,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) bool wd; if (tx_ring->xsk_pool) - wd = ice_clean_tx_irq_zc(tx_ring, budget); + wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -1513,7 +1521,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) { + if (napi_complete_done(napi, work_done)) { ice_net_dim(q_vector); ice_enable_interrupt(q_vector); } else { @@ -1917,12 +1925,16 @@ ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first) if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) return; - /* currently, we always assume 802.1Q for VLAN insertion as VLAN - * insertion for 802.1AD is not supported + /* the VLAN ethertype/tpid is determined by VSI configuration and netdev + * feature flags, which the driver only allows either 802.1Q or 802.1ad + * VLAN offloads exclusively so we only care about the VLAN ID here */ if (skb_vlan_tag_present(skb)) { first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S; - first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; + if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2) + first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN; + else + first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; } ice_tx_prepare_vlan_flags_dcb(tx_ring, first); @@ -2295,6 +2307,13 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) /* prepare the VLAN tagging flags for Tx */ ice_tx_prepare_vlan_flags(tx_ring, first); + if (first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) { + offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | + (ICE_TX_CTX_DESC_IL2TAG2 << + ICE_TXD_CTX_QW1_CMD_S)); + offload.cd_l2tag2 = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >> + ICE_TX_FLAGS_VLAN_S; + } /* set up TSO offload */ tso = ice_tso(first, &offload); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b7b3bd4816f0..cead3eb149bd 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -13,7 +13,6 @@ #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 -#define ICE_TX_THRESH 32 /* The size limit for a transmit buffer in a descriptor is (16K - 1). * In order to align with the read requests we will align the value to @@ -111,6 +110,8 @@ static inline int ice_skb_pad(void) (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) +#define ICE_RING_QUARTER(R) ((R)->count >> 2) + #define ICE_TX_FLAGS_TSO BIT(0) #define ICE_TX_FLAGS_HW_VLAN BIT(1) #define ICE_TX_FLAGS_SW_VLAN BIT(2) @@ -122,6 +123,7 @@ static inline int ice_skb_pad(void) #define ICE_TX_FLAGS_IPV4 BIT(5) #define ICE_TX_FLAGS_IPV6 BIT(6) #define ICE_TX_FLAGS_TUNNEL BIT(7) +#define ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN BIT(8) #define ICE_TX_FLAGS_VLAN_M 0xffff0000 #define ICE_TX_FLAGS_VLAN_PR_M 0xe0000000 #define ICE_TX_FLAGS_VLAN_PR_S 29 @@ -321,18 +323,21 @@ struct ice_tx_ring { u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ /* stats structs */ - struct ice_q_stats stats; - struct u64_stats_sync syncp; struct ice_txq_stats tx_stats; - /* CL3 - 3rd cacheline starts here */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; struct ice_ptp_tx *tx_tstamps; spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ + /* CL4 - 4th cacheline starts here */ + u16 xdp_tx_active; #define ICE_TX_FLAGS_RING_XDP BIT(0) +#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) +#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) u8 flags; u8 dcb_tc; /* Traffic class of ring */ u8 ptp_tx; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 0e87b98e0966..7ee38d02d1e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -209,9 +209,14 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, void ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) { - if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && - (vlan_tag & VLAN_VID_MASK)) + netdev_features_t features = rx_ring->netdev->features; + bool non_zero_vlan = !!(vlan_tag & VLAN_VID_MASK); + + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && non_zero_vlan) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + else if ((features & NETIF_F_HW_VLAN_STAG_RX) && non_zero_vlan) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag); + napi_gro_receive(&rx_ring->q_vector->napi, skb); } @@ -222,6 +227,7 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) { unsigned int total_bytes = 0, total_pkts = 0; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *next_dd_desc; u16 next_dd = xdp_ring->next_dd; @@ -233,7 +239,7 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) return; - for (i = 0; i < ICE_TX_THRESH; i++) { + for (i = 0; i < tx_thresh; i++) { tx_buf = &xdp_ring->tx_buf[ntc]; total_bytes += tx_buf->bytecount; @@ -254,9 +260,9 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) } next_dd_desc->cmd_type_offset_bsz = 0; - xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH; + xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh; if (xdp_ring->next_dd > xdp_ring->count) - xdp_ring->next_dd = ICE_TX_THRESH - 1; + xdp_ring->next_dd = tx_thresh - 1; xdp_ring->next_to_clean = ntc; ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes); } @@ -269,12 +275,13 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) */ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) { + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 i = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; dma_addr_t dma; - if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH) + if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh) ice_clean_xdp_irq(xdp_ring); if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { @@ -300,13 +307,14 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, size, 0); + xdp_ring->xdp_tx_active++; i++; if (i == xdp_ring->count) { i = 0; tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); tx_desc->cmd_type_offset_bsz |= cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs = ICE_TX_THRESH - 1; + xdp_ring->next_rs = tx_thresh - 1; } xdp_ring->next_to_use = i; @@ -314,7 +322,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); tx_desc->cmd_type_offset_bsz |= cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += ICE_TX_THRESH; + xdp_ring->next_rs += tx_thresh; } return ICE_XDP_TX; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 11b6c1601986..c7d2954dc9ea 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -7,7 +7,7 @@ /** * ice_test_staterr - tests bits in Rx descriptor status and error fields - * @rx_desc: pointer to receive descriptor (in le64 format) + * @status_err_n: Rx descriptor status_error0 or status_error1 bits * @stat_err_bits: value to mask * * This function does some fast chicanery in order to return the @@ -16,9 +16,9 @@ * at offset zero. */ static inline bool -ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits) +ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits) { - return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits)); + return !!(status_err_n & cpu_to_le16(stat_err_bits)); } static inline __le64 @@ -32,6 +32,30 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) } /** + * ice_get_vlan_tag_from_rx_desc - get VLAN from Rx flex descriptor + * @rx_desc: Rx 32b flex descriptor with RXDID=2 + * + * The OS and current PF implementation only support stripping a single VLAN tag + * at a time, so there should only ever be 0 or 1 tags in the l2tag* fields. If + * one is found return the tag, else return 0 to mean no VLAN tag was found. + */ +static inline u16 +ice_get_vlan_tag_from_rx_desc(union ice_32b_rx_flex_desc *rx_desc) +{ + u16 stat_err_bits; + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); + if (ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits)) + return le16_to_cpu(rx_desc->wb.l2tag1); + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S); + if (ice_test_staterr(rx_desc->wb.status_error1, stat_err_bits)) + return le16_to_cpu(rx_desc->wb.l2tag2_2nd); + + return 0; +} + +/** * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register * @xdp_ring: XDP Tx ring * diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 546145dd1f02..28fcab26b868 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -15,6 +15,7 @@ #include "ice_flex_type.h" #include "ice_protocol_type.h" #include "ice_sbq_cmd.h" +#include "ice_vlan_mode.h" static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc) { @@ -54,6 +55,11 @@ static inline u32 ice_round_to_num(u32 N, u32 R) #define ICE_DBG_AQ_DESC BIT_ULL(25) #define ICE_DBG_AQ_DESC_BUF BIT_ULL(26) #define ICE_DBG_AQ_CMD BIT_ULL(27) +#define ICE_DBG_AQ (ICE_DBG_AQ_MSG | \ + ICE_DBG_AQ_DESC | \ + ICE_DBG_AQ_DESC_BUF | \ + ICE_DBG_AQ_CMD) + #define ICE_DBG_USER BIT_ULL(31) enum ice_aq_res_ids { @@ -920,6 +926,9 @@ struct ice_hw { struct udp_tunnel_nic_shared udp_tunnel_shared; struct udp_tunnel_nic_info udp_tunnel_nic; + /* dvm boost update information */ + struct ice_dvm_table dvm_upd; + /* HW block tables */ struct ice_blk_info blk[ICE_BLK_COUNT]; struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */ @@ -943,6 +952,7 @@ struct ice_hw { struct list_head rss_list_head; struct ice_mbx_snapshot mbx_snapshot; DECLARE_BITMAP(hw_ptype, ICE_FLOW_PTYPE_MAX); + u8 dvm_ena; u16 io_expander_handle; }; @@ -1008,6 +1018,15 @@ struct ice_hw_port_stats { u64 fd_sb_match; }; +enum ice_sw_fwd_act_type { + ICE_FWD_TO_VSI = 0, + ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */ + ICE_FWD_TO_Q, + ICE_FWD_TO_QGRP, + ICE_DROP_PACKET, + ICE_INVAL_ACT +}; + struct ice_aq_get_set_rss_lut_params { u16 vsi_handle; /* software VSI handle */ u16 lut_size; /* size of the LUT buffer */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c new file mode 100644 index 000000000000..b16f946185f2 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice_vsi_vlan_ops.h" +#include "ice_vsi_vlan_lib.h" +#include "ice_vlan_mode.h" +#include "ice.h" +#include "ice_vf_vsi_vlan_ops.h" +#include "ice_virtchnl_pf.h" + +static int +noop_vlan_arg(struct ice_vsi __always_unused *vsi, + struct ice_vlan __always_unused *vlan) +{ + return 0; +} + +static int +noop_vlan(struct ice_vsi __always_unused *vsi) +{ + return 0; +} + +/** + * ice_vf_vsi_init_vlan_ops - Initialize default VSI VLAN ops for VF VSI + * @vsi: VF's VSI being configured + * + * If Double VLAN Mode (DVM) is enabled, assume that the VF supports the new + * VIRTCHNL_VF_VLAN_OFFLOAD_V2 capability and set up the VLAN ops accordingly. + * If SVM is enabled maintain the same level of VLAN support previous to + * VIRTCHNL_VF_VLAN_OFFLOAD_V2. + */ +void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + struct ice_pf *pf = vsi->back; + struct ice_vf *vf = vsi->vf; + + if (WARN_ON(!vf)) + return; + + if (ice_is_dvm_ena(&pf->hw)) { + vlan_ops = &vsi->outer_vlan_ops; + + /* outer VLAN ops regardless of port VLAN config */ + vlan_ops->add_vlan = ice_vsi_add_vlan; + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering; + vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering; + + if (ice_vf_is_port_vlan_ena(vf)) { + /* setup outer VLAN ops */ + vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; + vlan_ops->ena_rx_filtering = + ice_vsi_ena_rx_vlan_filtering; + + /* setup inner VLAN ops */ + vlan_ops = &vsi->inner_vlan_ops; + vlan_ops->add_vlan = noop_vlan_arg; + vlan_ops->del_vlan = noop_vlan_arg; + vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping; + vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping; + vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; + vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; + } else { + if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags)) + vlan_ops->ena_rx_filtering = noop_vlan; + else + vlan_ops->ena_rx_filtering = + ice_vsi_ena_rx_vlan_filtering; + + vlan_ops->del_vlan = ice_vsi_del_vlan; + vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping; + vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping; + vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion; + vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion; + + /* setup inner VLAN ops */ + vlan_ops = &vsi->inner_vlan_ops; + + vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping; + vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping; + vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; + vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; + } + } else { + vlan_ops = &vsi->inner_vlan_ops; + + /* inner VLAN ops regardless of port VLAN config */ + vlan_ops->add_vlan = ice_vsi_add_vlan; + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering; + vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering; + + if (ice_vf_is_port_vlan_ena(vf)) { + vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; + vlan_ops->ena_rx_filtering = + ice_vsi_ena_rx_vlan_filtering; + } else { + if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags)) + vlan_ops->ena_rx_filtering = noop_vlan; + else + vlan_ops->ena_rx_filtering = + ice_vsi_ena_rx_vlan_filtering; + + vlan_ops->del_vlan = ice_vsi_del_vlan; + vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping; + vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping; + vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion; + vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion; + } + } +} + +/** + * ice_vf_vsi_cfg_dvm_legacy_vlan_mode - Config VLAN mode for old VFs in DVM + * @vsi: VF's VSI being configured + * + * This should only be called when Double VLAN Mode (DVM) is enabled, there + * is not a port VLAN enabled on this VF, and the VF negotiates + * VIRTCHNL_VF_OFFLOAD_VLAN. + * + * This function sets up the VF VSI's inner and outer ice_vsi_vlan_ops and also + * initializes software only VLAN mode (i.e. allow all VLANs). Also, use no-op + * implementations for any functions that may be called during the lifetime of + * the VF so these methods do nothing and succeed. + */ +void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + struct ice_vf *vf = vsi->vf; + struct device *dev; + + if (WARN_ON(!vf)) + return; + + dev = ice_pf_to_dev(vf->pf); + + if (!ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf)) + return; + + vlan_ops = &vsi->outer_vlan_ops; + + /* Rx VLAN filtering always disabled to allow software offloaded VLANs + * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a + * port VLAN configured + */ + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + /* Don't fail when attempting to enable Rx VLAN filtering */ + vlan_ops->ena_rx_filtering = noop_vlan; + + /* Tx VLAN filtering always disabled to allow software offloaded VLANs + * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a + * port VLAN configured + */ + vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering; + /* Don't fail when attempting to enable Tx VLAN filtering */ + vlan_ops->ena_tx_filtering = noop_vlan; + + if (vlan_ops->dis_rx_filtering(vsi)) + dev_dbg(dev, "Failed to disable Rx VLAN filtering for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n"); + if (vlan_ops->dis_tx_filtering(vsi)) + dev_dbg(dev, "Failed to disable Tx VLAN filtering for old VF without VIRTHCNL_VF_OFFLOAD_VLAN_V2 support\n"); + + /* All outer VLAN offloads must be disabled */ + vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping; + vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion; + + if (vlan_ops->dis_stripping(vsi)) + dev_dbg(dev, "Failed to disable outer VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n"); + + if (vlan_ops->dis_insertion(vsi)) + dev_dbg(dev, "Failed to disable outer VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n"); + + /* All inner VLAN offloads must be disabled */ + vlan_ops = &vsi->inner_vlan_ops; + + vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping; + vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion; + + if (vlan_ops->dis_stripping(vsi)) + dev_dbg(dev, "Failed to disable inner VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n"); + + if (vlan_ops->dis_insertion(vsi)) + dev_dbg(dev, "Failed to disable inner VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n"); +} + +/** + * ice_vf_vsi_cfg_svm_legacy_vlan_mode - Config VLAN mode for old VFs in SVM + * @vsi: VF's VSI being configured + * + * This should only be called when Single VLAN Mode (SVM) is enabled, there is + * not a port VLAN enabled on this VF, and the VF negotiates + * VIRTCHNL_VF_OFFLOAD_VLAN. + * + * All of the normal SVM VLAN ops are identical for this case. However, by + * default Rx VLAN filtering should be turned off by default in this case. + */ +void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi) +{ + struct ice_vf *vf = vsi->vf; + + if (WARN_ON(!vf)) + return; + + if (ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf)) + return; + + if (vsi->inner_vlan_ops.dis_rx_filtering(vsi)) + dev_dbg(ice_pf_to_dev(vf->pf), "Failed to disable Rx VLAN filtering for old VF with VIRTCHNL_VF_OFFLOAD_VLAN support\n"); +} diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h new file mode 100644 index 000000000000..875a4e615f39 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_VF_VSI_VLAN_OPS_H_ +#define _ICE_VF_VSI_VLAN_OPS_H_ + +#include "ice_vsi_vlan_ops.h" + +struct ice_vsi; + +void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi); +void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi); + +#ifdef CONFIG_PCI_IOV +void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi); +#else +static inline void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) { } +#endif /* CONFIG_PCI_IOV */ +#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 9feebe5f556c..5a82216e7d03 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -55,6 +55,15 @@ static const u32 vlan_allowlist_opcodes[] = { VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, }; +/* VIRTCHNL_VF_OFFLOAD_VLAN_V2 */ +static const u32 vlan_v2_allowlist_opcodes[] = { + VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS, VIRTCHNL_OP_ADD_VLAN_V2, + VIRTCHNL_OP_DEL_VLAN_V2, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2, + VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2, + VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2, + VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2, +}; + /* VIRTCHNL_VF_OFFLOAD_RSS_PF */ static const u32 rss_pf_allowlist_opcodes[] = { VIRTCHNL_OP_CONFIG_RSS_KEY, VIRTCHNL_OP_CONFIG_RSS_LUT, @@ -89,6 +98,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = { ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes), }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index d64df81d4893..07989f1d08ef 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1288,15 +1288,16 @@ ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, union ice_32b_rx_flex_desc *rx_desc) { struct ice_pf *pf = ctrl_vsi->back; + struct ice_vf *vf = ctrl_vsi->vf; struct ice_vf_fdir_ctx *ctx_done; struct ice_vf_fdir_ctx *ctx_irq; struct ice_vf_fdir *fdir; unsigned long flags; struct device *dev; - struct ice_vf *vf; int ret; - vf = &pf->vf[ctrl_vsi->vf_id]; + if (WARN_ON(!vf)) + return; fdir = &vf->fdir; ctx_done = &fdir->ctx_done; @@ -1571,15 +1572,16 @@ err_exit: */ void ice_flush_fdir_ctx(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state)) return; - ice_for_each_vf(pf, i) { + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { struct device *dev = ice_pf_to_dev(pf); enum virtchnl_fdir_prgm_status status; - struct ice_vf *vf = &pf->vf[i]; struct ice_vf_fdir_ctx *ctx; unsigned long flags; int ret; @@ -1633,6 +1635,7 @@ err_exit: ctx->flags &= ~ICE_VF_FDIR_CTX_VALID; spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); } + mutex_unlock(&pf->vfs.table_lock); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 1be3cd4b2bef..45fe36db076a 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -10,6 +10,8 @@ #include "ice_eswitch.h" #include "ice_virtchnl_allowlist.h" #include "ice_flex_pipe.h" +#include "ice_vf_vsi_vlan_ops.h" +#include "ice_vlan.h" #define FIELD_SELECTOR(proto_hdr_field) \ BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK) @@ -173,18 +175,107 @@ struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) } /** - * ice_validate_vf_id - helper to check if VF ID is valid - * @pf: pointer to the PF structure - * @vf_id: the ID of the VF to check + * ice_get_vf_by_id - Get pointer to VF by ID + * @pf: the PF private structure + * @vf_id: the VF ID to locate + * + * Locate and return a pointer to the VF structure associated with a given ID. + * Returns NULL if the ID does not have a valid VF structure associated with + * it. + * + * This function takes a reference to the VF, which must be released by + * calling ice_put_vf() once the caller is finished accessing the VF structure + * returned. */ -static int ice_validate_vf_id(struct ice_pf *pf, u16 vf_id) +struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) { - /* vf_id range is only valid for 0-255, and should always be unsigned */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %u\n", vf_id); - return -EINVAL; + struct ice_vf *vf; + + rcu_read_lock(); + hash_for_each_possible_rcu(pf->vfs.table, vf, entry, vf_id) { + if (vf->vf_id == vf_id) { + struct ice_vf *found; + + if (kref_get_unless_zero(&vf->refcnt)) + found = vf; + else + found = NULL; + + rcu_read_unlock(); + return found; + } } - return 0; + rcu_read_unlock(); + + return NULL; +} + +/** + * ice_release_vf - Release VF associated with a refcount + * @ref: the kref decremented to zero + * + * Callback function for kref_put to release a VF once its reference count has + * hit zero. + */ +static void ice_release_vf(struct kref *ref) +{ + struct ice_vf *vf = container_of(ref, struct ice_vf, refcnt); + + mutex_destroy(&vf->cfg_lock); + + kfree_rcu(vf, rcu); +} + +/** + * ice_put_vf - Release a reference to a VF + * @vf: the VF structure to decrease reference count on + * + * This must be called after ice_get_vf_by_id() once the reference to the VF + * structure is no longer used. Otherwise, the VF structure will never be + * freed. + */ +void ice_put_vf(struct ice_vf *vf) +{ + kref_put(&vf->refcnt, ice_release_vf); +} + +/** + * ice_has_vfs - Return true if the PF has any associated VFs + * @pf: the PF private structure + * + * Return whether or not the PF has any allocated VFs. + * + * Note that this function only guarantees that there are no VFs at the point + * of calling it. It does not guarantee that no more VFs will be added. + */ +bool ice_has_vfs(struct ice_pf *pf) +{ + /* A simple check that the hash table is not empty does not require + * the mutex or rcu_read_lock. + */ + return !hash_empty(pf->vfs.table); +} + +/** + * ice_get_num_vfs - Get number of allocated VFs + * @pf: the PF private structure + * + * Return the total number of allocated VFs. NOTE: VF IDs are not guaranteed + * to be contiguous. Do not assume that a VF ID is guaranteed to be less than + * the output of this function. + */ +u16 ice_get_num_vfs(struct ice_pf *pf) +{ + struct ice_vf *vf; + unsigned int bkt; + u16 num_vfs = 0; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + num_vfs++; + rcu_read_unlock(); + + return num_vfs; } /** @@ -203,6 +294,32 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) } /** + * ice_free_vf_entries - Free all VF entries from the hash table + * @pf: pointer to the PF structure + * + * Iterate over the VF hash table, removing and releasing all VF entries. + * Called during VF teardown or as cleanup during failed VF initialization. + */ +static void ice_free_vf_entries(struct ice_pf *pf) +{ + struct ice_vfs *vfs = &pf->vfs; + struct hlist_node *tmp; + struct ice_vf *vf; + unsigned int bkt; + + /* Remove all VFs from the hash table and release their main + * reference. Once all references to the VF are dropped, ice_put_vf() + * will call ice_release_vf which will remove the VF memory. + */ + lockdep_assert_held(&vfs->table_lock); + + hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) { + hash_del_rcu(&vf->entry); + ice_put_vf(vf); + } +} + +/** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure * @v_opcode: operation code @@ -215,11 +332,11 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { struct ice_hw *hw = &pf->hw; - unsigned int i; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + struct ice_vf *vf; + unsigned int bkt; + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { /* Not all vfs are enabled so skip the ones that are not */ if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) @@ -231,6 +348,7 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode, ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg, msglen, NULL); } + mutex_unlock(&pf->vfs.table_lock); } /** @@ -379,7 +497,7 @@ static void ice_free_vf_res(struct ice_vf *vf) vf->num_mac = 0; } - last_vector_idx = vf->first_vector_idx + pf->num_msix_per_vf - 1; + last_vector_idx = vf->first_vector_idx + pf->vfs.num_msix_per - 1; /* clear VF MDD event information */ memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); @@ -415,7 +533,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); first = vf->first_vector_idx; - last = first + pf->num_msix_per_vf - 1; + last = first + pf->vfs.num_msix_per - 1; for (v = first; v <= last; v++) { u32 reg; @@ -497,14 +615,14 @@ static void ice_dis_vf_qs(struct ice_vf *vf) void ice_free_vfs(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); + struct ice_vfs *vfs = &pf->vfs; struct ice_hw *hw = &pf->hw; - unsigned int tmp, i; + struct ice_vf *vf; + unsigned int bkt; - if (!pf->vf) + if (!ice_has_vfs(pf)) return; - ice_eswitch_release(pf); - while (test_and_set_bit(ICE_VF_DIS, pf->state)) usleep_range(1000, 2000); @@ -517,12 +635,11 @@ void ice_free_vfs(struct ice_pf *pf) else dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - tmp = pf->num_alloc_vfs; - pf->num_qps_per_vf = 0; - pf->num_alloc_vfs = 0; - for (i = 0; i < tmp; i++) { - struct ice_vf *vf = &pf->vf[i]; + mutex_lock(&vfs->table_lock); + + ice_eswitch_release(pf); + ice_for_each_vf(pf, bkt, vf) { mutex_lock(&vf->cfg_lock); ice_dis_vf_qs(vf); @@ -534,42 +651,30 @@ void ice_free_vfs(struct ice_pf *pf) ice_free_vf_res(vf); } - mutex_unlock(&vf->cfg_lock); + if (!pci_vfs_assigned(pf->pdev)) { + u32 reg_idx, bit_idx; - mutex_destroy(&vf->cfg_lock); + reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; + wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); + } + + /* clear malicious info since the VF is getting released */ + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_VF_COUNT, vf->vf_id)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", + vf->vf_id); + + mutex_unlock(&vf->cfg_lock); } if (ice_sriov_free_msix_res(pf)) dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); - devm_kfree(dev, pf->vf); - pf->vf = NULL; + vfs->num_qps_per = 0; + ice_free_vf_entries(pf); - /* This check is for when the driver is unloaded while VFs are - * assigned. Setting the number of VFs to 0 through sysfs is caught - * before this function ever gets called. - */ - if (!pci_vfs_assigned(pf->pdev)) { - unsigned int vf_id; - - /* Acknowledge VFLR for all VFs. Without this, VFs will fail to - * work correctly when SR-IOV gets re-enabled. - */ - for (vf_id = 0; vf_id < tmp; vf_id++) { - u32 reg_idx, bit_idx; - - reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32; - bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; - wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); - } - } - - /* clear malicious info if the VFs are getting released */ - for (i = 0; i < tmp; i++) - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, - ICE_MAX_VF_COUNT, i)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", - i); + mutex_unlock(&vfs->table_lock); clear_bit(ICE_VF_DIS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); @@ -644,55 +749,6 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) } /** - * ice_vsi_manage_pvid - Enable or disable port VLAN for VSI - * @vsi: the VSI to update - * @pvid_info: VLAN ID and QoS used to set the PVID VSI context field - * @enable: true for enable PVID false for disable - */ -static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 pvid_info, bool enable) -{ - struct ice_hw *hw = &vsi->back->hw; - struct ice_aqc_vsi_props *info; - struct ice_vsi_ctx *ctxt; - int ret; - - ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (!ctxt) - return -ENOMEM; - - ctxt->info = vsi->info; - info = &ctxt->info; - if (enable) { - info->vlan_flags = ICE_AQ_VSI_VLAN_MODE_UNTAGGED | - ICE_AQ_VSI_PVLAN_INSERT_PVID | - ICE_AQ_VSI_VLAN_EMOD_STR; - info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - } else { - info->vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING | - ICE_AQ_VSI_VLAN_MODE_ALL; - info->sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - } - - info->pvid = cpu_to_le16(pvid_info); - info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID | - ICE_AQ_VSI_PROP_SW_VALID); - - ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); - if (ret) { - dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n", - ret, ice_aq_str(hw->adminq.sq_last_status)); - goto out; - } - - vsi->info.vlan_flags = info->vlan_flags; - vsi->info.sw_flags2 = info->sw_flags2; - vsi->info.pvid = info->pvid; -out: - kfree(ctxt); - return ret; -} - -/** * ice_vf_get_port_info - Get the VF's port info structure * @vf: VF used to get the port info structure for */ @@ -714,7 +770,7 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id, NULL); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); @@ -741,7 +797,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id, NULL); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); ice_vf_ctrl_invalidate_vsi(vf); @@ -764,7 +820,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) */ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) { - return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf; + return pf->sriov_base_vector + vf->vf_id * pf->vfs.num_msix_per; } /** @@ -801,43 +857,151 @@ static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf) return 0; } +static u16 ice_vf_get_port_vlan_id(struct ice_vf *vf) +{ + return vf->port_vlan_info.vid; +} + +static u8 ice_vf_get_port_vlan_prio(struct ice_vf *vf) +{ + return vf->port_vlan_info.prio; +} + +bool ice_vf_is_port_vlan_ena(struct ice_vf *vf) +{ + return (ice_vf_get_port_vlan_id(vf) || ice_vf_get_port_vlan_prio(vf)); +} + +static u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf) +{ + return vf->port_vlan_info.tpid; +} + /** * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN * @vf: VF to add MAC filters for + * @vsi: Pointer to VSI * * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver * always re-adds either a VLAN 0 or port VLAN based filter after reset. */ -static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf) +static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf, struct ice_vsi *vsi) { + struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); struct device *dev = ice_pf_to_dev(vf->pf); - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - u16 vlan_id = 0; int err; - if (vf->port_vlan_info) { - err = ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true); + if (ice_vf_is_port_vlan_ena(vf)) { + err = vlan_ops->set_port_vlan(vsi, &vf->port_vlan_info); if (err) { dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n", vf->vf_id, err); return err; } - vlan_id = vf->port_vlan_info & VLAN_VID_MASK; + err = vlan_ops->add_vlan(vsi, &vf->port_vlan_info); + } else { + err = ice_vsi_add_vlan_zero(vsi); } - /* vlan_id will either be 0 or the port VLAN number */ - err = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI); if (err) { - dev_err(dev, "failed to add %s VLAN %u filter for VF %u, error %d\n", - vf->port_vlan_info ? "port" : "", vlan_id, vf->vf_id, - err); + dev_err(dev, "failed to add VLAN %u filter for VF %u during VF rebuild, error %d\n", + ice_vf_is_port_vlan_ena(vf) ? + ice_vf_get_port_vlan_id(vf) : 0, vf->vf_id, err); return err; } + err = vlan_ops->ena_rx_filtering(vsi); + if (err) + dev_warn(dev, "failed to enable Rx VLAN filtering for VF %d VSI %d during VF rebuild, error %d\n", + vf->vf_id, vsi->idx, err); + return 0; } +static int ice_cfg_mac_antispoof(struct ice_vsi *vsi, bool enable) +{ + struct ice_vsi_ctx *ctx; + int err; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->info.sec_flags = vsi->info.sec_flags; + ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + + if (enable) + ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; + else + ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; + + err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx MAC anti-spoof %s for VSI %d, error %d\n", + enable ? "ON" : "OFF", vsi->vsi_num, err); + else + vsi->info.sec_flags = ctx->info.sec_flags; + + kfree(ctx); + + return err; +} + +/** + * ice_vsi_ena_spoofchk - enable Tx spoof checking for this VSI + * @vsi: VSI to enable Tx spoof checking for + */ +static int ice_vsi_ena_spoofchk(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + int err; + + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + + err = vlan_ops->ena_tx_filtering(vsi); + if (err) + return err; + + return ice_cfg_mac_antispoof(vsi, true); +} + +/** + * ice_vsi_dis_spoofchk - disable Tx spoof checking for this VSI + * @vsi: VSI to disable Tx spoof checking for + */ +static int ice_vsi_dis_spoofchk(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + int err; + + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + + err = vlan_ops->dis_tx_filtering(vsi); + if (err) + return err; + + return ice_cfg_mac_antispoof(vsi, false); +} + +/** + * ice_vf_set_spoofchk_cfg - apply Tx spoof checking setting + * @vf: VF set spoofchk for + * @vsi: VSI associated to the VF + */ +static int +ice_vf_set_spoofchk_cfg(struct ice_vf *vf, struct ice_vsi *vsi) +{ + int err; + + if (vf->spoofchk) + err = ice_vsi_ena_spoofchk(vsi); + else + err = ice_vsi_dis_spoofchk(vsi); + + return err; +} + /** * ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA * @vf: VF to add MAC filters for @@ -913,12 +1077,12 @@ static void ice_ena_vf_msix_mappings(struct ice_vf *vf) hw = &pf->hw; pf_based_first_msix = vf->first_vector_idx; - pf_based_last_msix = (pf_based_first_msix + pf->num_msix_per_vf) - 1; + pf_based_last_msix = (pf_based_first_msix + pf->vfs.num_msix_per) - 1; device_based_first_msix = pf_based_first_msix + pf->hw.func_caps.common_cap.msix_vector_first_id; device_based_last_msix = - (device_based_first_msix + pf->num_msix_per_vf) - 1; + (device_based_first_msix + pf->vfs.num_msix_per) - 1; device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id; reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) & @@ -1009,45 +1173,6 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) } /** - * ice_determine_res - * @pf: pointer to the PF structure - * @avail_res: available resources in the PF structure - * @max_res: maximum resources that can be given per VF - * @min_res: minimum resources that can be given per VF - * - * Returns non-zero value if resources (queues/vectors) are available or - * returns zero if PF cannot accommodate for all num_alloc_vfs. - */ -static int -ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res) -{ - bool checked_min_res = false; - int res; - - /* start by checking if PF can assign max number of resources for - * all num_alloc_vfs. - * if yes, return number per VF - * If no, divide by 2 and roundup, check again - * repeat the loop till we reach a point where even minimum resources - * are not available, in that case return 0 - */ - res = max_res; - while ((res >= min_res) && !checked_min_res) { - int num_all_res; - - num_all_res = pf->num_alloc_vfs * res; - if (num_all_res <= avail_res) - return res; - - if (res == min_res) - checked_min_res = true; - - res = DIV_ROUND_UP(res, 2); - } - return 0; -} - -/** * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space * @vf: VF to calculate the register index for * @q_vector: a q_vector associated to the VF @@ -1062,7 +1187,7 @@ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) pf = vf->pf; /* always add one to account for the OICR being the first MSIX */ - return pf->sriov_base_vector + pf->num_msix_per_vf * vf->vf_id + + return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id + q_vector->v_idx + 1; } @@ -1126,6 +1251,7 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) /** * ice_set_per_vf_res - check if vectors and queues are available * @pf: pointer to the PF structure + * @num_vfs: the number of SR-IOV VFs being configured * * First, determine HW interrupts from common pool. If we allocate fewer VFs, we * get more vectors and can enable more queues per VF. Note that this does not @@ -1144,20 +1270,22 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) * Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used * by each VF during VF initialization and reset. */ -static int ice_set_per_vf_res(struct ice_pf *pf) +static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) { int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); + u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; int msix_avail_per_vf, msix_avail_for_sriov; struct device *dev = ice_pf_to_dev(pf); - u16 num_msix_per_vf, num_txq, num_rxq; - if (!pf->num_alloc_vfs || max_valid_res_idx < 0) + lockdep_assert_held(&pf->vfs.table_lock); + + if (!num_vfs || max_valid_res_idx < 0) return -EINVAL; /* determine MSI-X resources per VF */ msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - pf->irq_tracker->num_entries; - msix_avail_per_vf = msix_avail_for_sriov / pf->num_alloc_vfs; + msix_avail_per_vf = msix_avail_for_sriov / num_vfs; if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { num_msix_per_vf = ICE_NUM_VF_MSIX_MED; } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) { @@ -1169,40 +1297,43 @@ static int ice_set_per_vf_res(struct ice_pf *pf) } else { dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n", msix_avail_for_sriov, ICE_MIN_INTR_PER_VF, - pf->num_alloc_vfs); + num_vfs); return -EIO; } - /* determine queue resources per VF */ - num_txq = ice_determine_res(pf, ice_get_avail_txq_count(pf), - min_t(u16, - num_msix_per_vf - ICE_NONQ_VECS_VF, - ICE_MAX_RSS_QS_PER_VF), - ICE_MIN_QS_PER_VF); + num_txq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, + ICE_MAX_RSS_QS_PER_VF); + avail_qs = ice_get_avail_txq_count(pf) / num_vfs; + if (!avail_qs) + num_txq = 0; + else if (num_txq > avail_qs) + num_txq = rounddown_pow_of_two(avail_qs); - num_rxq = ice_determine_res(pf, ice_get_avail_rxq_count(pf), - min_t(u16, - num_msix_per_vf - ICE_NONQ_VECS_VF, - ICE_MAX_RSS_QS_PER_VF), - ICE_MIN_QS_PER_VF); + num_rxq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, + ICE_MAX_RSS_QS_PER_VF); + avail_qs = ice_get_avail_rxq_count(pf) / num_vfs; + if (!avail_qs) + num_rxq = 0; + else if (num_rxq > avail_qs) + num_rxq = rounddown_pow_of_two(avail_qs); - if (!num_txq || !num_rxq) { + if (num_txq < ICE_MIN_QS_PER_VF || num_rxq < ICE_MIN_QS_PER_VF) { dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n", - ICE_MIN_QS_PER_VF, pf->num_alloc_vfs); + ICE_MIN_QS_PER_VF, num_vfs); return -EIO; } - if (ice_sriov_set_msix_res(pf, num_msix_per_vf * pf->num_alloc_vfs)) { + if (ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs)) { dev_err(dev, "Unable to set MSI-X resources for %d VFs\n", - pf->num_alloc_vfs); + num_vfs); return -EINVAL; } /* only allow equal Tx/Rx queue count (i.e. queue pairs) */ - pf->num_qps_per_vf = min_t(int, num_txq, num_rxq); - pf->num_msix_per_vf = num_msix_per_vf; + pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq); + pf->vfs.num_msix_per = num_msix_per_vf; dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n", - pf->num_alloc_vfs, pf->num_msix_per_vf, pf->num_qps_per_vf); + num_vfs, pf->vfs.num_msix_per, pf->vfs.num_qps_per); return 0; } @@ -1228,10 +1359,10 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) struct ice_hw *hw = &vsi->back->hw; int status; - if (vf->port_vlan_info) + if (ice_vf_is_port_vlan_ena(vf)) status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, - vf->port_vlan_info & VLAN_VID_MASK); - else if (vsi->num_vlan > 1) + ice_vf_get_port_vlan_id(vf)); + else if (ice_vsi_has_non_zero_vlans(vsi)) status = ice_fltr_set_vlan_vsi_promisc(hw, vsi, promisc_m); else status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, 0); @@ -1251,10 +1382,10 @@ ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) struct ice_hw *hw = &vsi->back->hw; int status; - if (vf->port_vlan_info) + if (ice_vf_is_port_vlan_ena(vf)) status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, - vf->port_vlan_info & VLAN_VID_MASK); - else if (vsi->num_vlan > 1) + ice_vf_get_port_vlan_id(vf)); + else if (ice_vsi_has_non_zero_vlans(vsi)) status = ice_fltr_clear_vlan_vsi_promisc(hw, vsi, promisc_m); else status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, 0); @@ -1339,7 +1470,7 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n", vf->vf_id); - if (ice_vf_rebuild_host_vlan_cfg(vf)) + if (ice_vf_rebuild_host_vlan_cfg(vf, vsi)) dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", vf->vf_id); @@ -1347,6 +1478,10 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n", vf->vf_id); + if (ice_vf_set_spoofchk_cfg(vf, vsi)) + dev_err(dev, "failed to rebuild spoofchk configuration for VF %d\n", + vf->vf_id); + /* rebuild aggregator node config for main VF VSI */ ice_vf_rebuild_aggregator_node_cfg(vsi); } @@ -1407,6 +1542,7 @@ static void ice_vf_set_initialized(struct ice_vf *vf) clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); clear_bit(ICE_VF_STATE_DIS, vf->vf_states); set_bit(ICE_VF_STATE_INIT, vf->vf_states); + memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps)); } /** @@ -1444,24 +1580,30 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; struct ice_vf *vf; - int v, i; + unsigned int bkt; /* If we don't have any VFs, then there is nothing to reset */ - if (!pf->num_alloc_vfs) + if (!ice_has_vfs(pf)) return false; + mutex_lock(&pf->vfs.table_lock); + /* clear all malicious info if the VFs are getting reset */ - ice_for_each_vf(pf, i) - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, i)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i); + ice_for_each_vf(pf, bkt, vf) + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_VF_COUNT, vf->vf_id)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", + vf->vf_id); /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(ICE_VF_DIS, pf->state)) + if (test_and_set_bit(ICE_VF_DIS, pf->state)) { + mutex_unlock(&pf->vfs.table_lock); return false; + } /* Begin reset on all VFs at once */ - ice_for_each_vf(pf, v) - ice_trigger_vf_reset(&pf->vf[v], is_vflr, true); + ice_for_each_vf(pf, bkt, vf) + ice_trigger_vf_reset(vf, is_vflr, true); /* HW requires some time to make sure it can flush the FIFO for a VF * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in @@ -1469,36 +1611,34 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) * the VFs using a simple iterator that increments once that VF has * finished resetting. */ - for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { - /* Check each VF in sequence */ - while (v < pf->num_alloc_vfs) { - u32 reg; - - vf = &pf->vf[v]; - reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id)); - if (!(reg & VPGEN_VFRSTAT_VFRD_M)) { - /* only delay if the check failed */ - usleep_range(10, 20); + ice_for_each_vf(pf, bkt, vf) { + bool done = false; + unsigned int i; + u32 reg; + + for (i = 0; i < 10; i++) { + reg = rd32(&pf->hw, VPGEN_VFRSTAT(vf->vf_id)); + if (reg & VPGEN_VFRSTAT_VFRD_M) { + done = true; break; } - /* If the current VF has finished resetting, move on - * to the next VF in sequence. + /* only delay if check failed */ + usleep_range(10, 20); + } + + if (!done) { + /* Display a warning if at least one VF didn't manage + * to reset in time, but continue on with the + * operation. */ - v++; + dev_warn(dev, "VF %u reset check timeout\n", vf->vf_id); + break; } } - /* Display a warning if at least one VF didn't manage to reset in - * time, but continue on with the operation. - */ - if (v < pf->num_alloc_vfs) - dev_warn(dev, "VF reset check timeout\n"); - /* free VF resources to begin resetting the VSI state */ - ice_for_each_vf(pf, v) { - vf = &pf->vf[v]; - + ice_for_each_vf(pf, bkt, vf) { mutex_lock(&vf->cfg_lock); vf->driver_caps = 0; @@ -1526,6 +1666,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_flush(hw); clear_bit(ICE_VF_DIS, pf->state); + mutex_unlock(&pf->vfs.table_lock); + return true; } @@ -1630,7 +1772,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) */ if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { - if (vf->port_vlan_info || vsi->num_vlan) + if (ice_vf_is_port_vlan_ena(vf) || vsi->num_vlan) promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; else promisc_m = ICE_UCAST_PROMISC_BITS; @@ -1662,7 +1804,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) ice_eswitch_replay_vf_mac_rule(vf); /* if the VF has been reset allow it to come up again */ - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, vf->vf_id)) + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_VF_COUNT, vf->vf_id)) dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i); return true; @@ -1674,10 +1817,13 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) */ void ice_vc_notify_link_state(struct ice_pf *pf) { - int i; + struct ice_vf *vf; + unsigned int bkt; - ice_for_each_vf(pf, i) - ice_vc_notify_vf_link_state(&pf->vf[i]); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) + ice_vc_notify_vf_link_state(vf); + mutex_unlock(&pf->vfs.table_lock); } /** @@ -1690,7 +1836,7 @@ void ice_vc_notify_reset(struct ice_pf *pf) { struct virtchnl_pf_event pfe; - if (!pf->num_alloc_vfs) + if (!ice_has_vfs(pf)) return; pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; @@ -1706,14 +1852,7 @@ void ice_vc_notify_reset(struct ice_pf *pf) static void ice_vc_notify_vf_reset(struct ice_vf *vf) { struct virtchnl_pf_event pfe; - struct ice_pf *pf; - - if (!vf) - return; - - pf = vf->pf; - if (ice_validate_vf_id(pf, vf->vf_id)) - return; + struct ice_pf *pf = vf->pf; /* Bail out if VF is in disabled state, neither initialized, nor active * state - otherwise proceed with notifications @@ -1739,6 +1878,7 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf) */ static int ice_init_vf_vsi_res(struct ice_vf *vf) { + struct ice_vsi_vlan_ops *vlan_ops; struct ice_pf *pf = vf->pf; u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; @@ -1752,13 +1892,21 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf) if (!vsi) return -ENOMEM; - err = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI); + err = ice_vsi_add_vlan_zero(vsi); if (err) { dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n", vf->vf_id); goto release_vsi; } + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + err = vlan_ops->ena_rx_filtering(vsi); + if (err) { + dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n", + vf->vf_id); + goto release_vsi; + } + eth_broadcast_addr(broadcast); err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); if (err) { @@ -1767,6 +1915,13 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf) goto release_vsi; } + err = ice_vf_set_spoofchk_cfg(vf, vsi); + if (err) { + dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n", + vf->vf_id); + goto release_vsi; + } + vf->num_mac = 1; return 0; @@ -1783,11 +1938,14 @@ release_vsi: static int ice_start_vfs(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - int retval, i; + unsigned int bkt, it_cnt; + struct ice_vf *vf; + int retval; - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + lockdep_assert_held(&pf->vfs.table_lock); + it_cnt = 0; + ice_for_each_vf(pf, bkt, vf) { ice_clear_vf_reset_trigger(vf); retval = ice_init_vf_vsi_res(vf); @@ -1800,40 +1958,63 @@ static int ice_start_vfs(struct ice_pf *pf) set_bit(ICE_VF_STATE_INIT, vf->vf_states); ice_ena_vf_mappings(vf); wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); + it_cnt++; } ice_flush(hw); return 0; teardown: - for (i = i - 1; i >= 0; i--) { - struct ice_vf *vf = &pf->vf[i]; + ice_for_each_vf(pf, bkt, vf) { + if (it_cnt == 0) + break; ice_dis_vf_mappings(vf); ice_vf_vsi_release(vf); + it_cnt--; } return retval; } /** - * ice_set_dflt_settings_vfs - set VF defaults during initialization/creation - * @pf: PF holding reference to all VFs for default configuration + * ice_create_vf_entries - Allocate and insert VF entries + * @pf: pointer to the PF structure + * @num_vfs: the number of VFs to allocate + * + * Allocate new VF entries and insert them into the hash table. Set some + * basic default fields for initializing the new VFs. + * + * After this function exits, the hash table will have num_vfs entries + * inserted. + * + * Returns 0 on success or an integer error code on failure. */ -static void ice_set_dflt_settings_vfs(struct ice_pf *pf) +static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs) { - int i; + struct ice_vfs *vfs = &pf->vfs; + struct ice_vf *vf; + u16 vf_id; + int err; + + lockdep_assert_held(&vfs->table_lock); - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + for (vf_id = 0; vf_id < num_vfs; vf_id++) { + vf = kzalloc(sizeof(*vf), GFP_KERNEL); + if (!vf) { + err = -ENOMEM; + goto err_free_entries; + } + kref_init(&vf->refcnt); vf->pf = pf; - vf->vf_id = i; + vf->vf_id = vf_id; + vf->vf_sw_id = pf->first_sw; /* assign default capabilities */ set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps); vf->spoofchk = true; - vf->num_vf_qs = pf->num_qps_per_vf; + vf->num_vf_qs = pf->vfs.num_qps_per; ice_vc_set_default_allowlist(vf); /* ctrl_vsi_idx will be set to a valid value only when VF @@ -1845,27 +2026,15 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf) ice_vc_set_dflt_vf_ops(&vf->vc_ops); mutex_init(&vf->cfg_lock); - } -} - -/** - * ice_alloc_vfs - allocate num_vfs in the PF structure - * @pf: PF to store the allocated VFs in - * @num_vfs: number of VFs to allocate - */ -static int ice_alloc_vfs(struct ice_pf *pf, int num_vfs) -{ - struct ice_vf *vfs; - vfs = devm_kcalloc(ice_pf_to_dev(pf), num_vfs, sizeof(*vfs), - GFP_KERNEL); - if (!vfs) - return -ENOMEM; - - pf->vf = vfs; - pf->num_alloc_vfs = num_vfs; + hash_add_rcu(vfs->table, &vf->entry, vf_id); + } return 0; + +err_free_entries: + ice_free_vf_entries(pf); + return err; } /** @@ -1886,28 +2055,29 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) ice_flush(hw); ret = pci_enable_sriov(pf->pdev, num_vfs); - if (ret) { - pf->num_alloc_vfs = 0; + if (ret) goto err_unroll_intr; - } - ret = ice_alloc_vfs(pf, num_vfs); - if (ret) - goto err_pci_disable_sriov; + mutex_lock(&pf->vfs.table_lock); - if (ice_set_per_vf_res(pf)) { + if (ice_set_per_vf_res(pf, num_vfs)) { dev_err(dev, "Not enough resources for %d VFs, try with fewer number of VFs\n", num_vfs); ret = -ENOSPC; goto err_unroll_sriov; } - ice_set_dflt_settings_vfs(pf); + ret = ice_create_vf_entries(pf, num_vfs); + if (ret) { + dev_err(dev, "Failed to allocate VF entries for %d VFs\n", + num_vfs); + goto err_unroll_sriov; + } if (ice_start_vfs(pf)) { dev_err(dev, "Failed to start VF(s)\n"); ret = -EAGAIN; - goto err_unroll_sriov; + goto err_unroll_vf_entries; } clear_bit(ICE_VF_DIS, pf->state); @@ -1920,13 +2090,14 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state)) ice_irq_dynamic_ena(hw, NULL, NULL); + mutex_unlock(&pf->vfs.table_lock); + return 0; +err_unroll_vf_entries: + ice_free_vf_entries(pf); err_unroll_sriov: - devm_kfree(dev, pf->vf); - pf->vf = NULL; - pf->num_alloc_vfs = 0; -err_pci_disable_sriov: + mutex_unlock(&pf->vfs.table_lock); pci_disable_sriov(pf->pdev); err_unroll_intr: /* rearm interrupts here */ @@ -1953,9 +2124,9 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) else if (pre_existing_vfs && pre_existing_vfs == num_vfs) return 0; - if (num_vfs > pf->num_vfs_supported) { + if (num_vfs > pf->vfs.num_supported) { dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", - num_vfs, pf->num_vfs_supported); + num_vfs, pf->vfs.num_supported); return -EOPNOTSUPP; } @@ -2053,19 +2224,20 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) void ice_process_vflr_event(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - unsigned int vf_id; + struct ice_vf *vf; + unsigned int bkt; u32 reg; if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) || - !pf->num_alloc_vfs) + !ice_has_vfs(pf)) return; - ice_for_each_vf(pf, vf_id) { - struct ice_vf *vf = &pf->vf[vf_id]; + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { u32 reg_idx, bit_idx; - reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32; - bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; + reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; /* read GLGEN_VFLRSTAT register to find out the flr VFs */ reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); if (reg & BIT(bit_idx)) { @@ -2075,6 +2247,7 @@ void ice_process_vflr_event(struct ice_pf *pf) mutex_unlock(&vf->cfg_lock); } } + mutex_unlock(&pf->vfs.table_lock); } /** @@ -2094,22 +2267,36 @@ static void ice_vc_reset_vf(struct ice_vf *vf) * * If no VF is found who owns the pfq then return NULL, otherwise return a * pointer to the VF who owns the pfq + * + * If this function returns non-NULL, it acquires a reference count of the VF + * structure. The caller is responsible for calling ice_put_vf() to drop this + * reference. */ static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq) { - unsigned int vf_id; + struct ice_vf *vf; + unsigned int bkt; - ice_for_each_vf(pf, vf_id) { - struct ice_vf *vf = &pf->vf[vf_id]; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { struct ice_vsi *vsi; u16 rxq_idx; vsi = ice_get_vf_vsi(vf); ice_for_each_rxq(vsi, rxq_idx) - if (vsi->rxq_map[rxq_idx] == pfq) - return vf; + if (vsi->rxq_map[rxq_idx] == pfq) { + struct ice_vf *found; + + if (kref_get_unless_zero(&vf->refcnt)) + found = vf; + else + found = NULL; + rcu_read_unlock(); + return found; + } } + rcu_read_unlock(); return NULL; } @@ -2153,6 +2340,8 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) mutex_lock(&vf->cfg_lock); ice_vc_reset_vf(vf); mutex_unlock(&vf->cfg_lock); + + ice_put_vf(vf); } /** @@ -2173,13 +2362,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, struct ice_pf *pf; int aq_ret; - if (!vf) - return -EINVAL; - pf = vf->pf; - if (ice_validate_vf_id(pf, vf->vf_id)) - return -EINVAL; - dev = ice_pf_to_dev(pf); aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval, @@ -2233,7 +2416,7 @@ static u16 ice_vc_get_max_frame_size(struct ice_vf *vf) max_frame_size = pi->phy.link_info.max_frame_size; - if (vf->port_vlan_info) + if (ice_vf_is_port_vlan_ena(vf)) max_frame_size -= VLAN_HLEN; return max_frame_size; @@ -2282,8 +2465,33 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) goto err; } - if (!vsi->info.pvid) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN_V2) { + /* VLAN offloads based on current device configuration */ + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN_V2; + } else if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN) { + /* allow VF to negotiate VIRTCHNL_VF_OFFLOAD explicitly for + * these two conditions, which amounts to guest VLAN filtering + * and offloads being based on the inner VLAN or the + * inner/single VLAN respectively and don't allow VF to + * negotiate VIRTCHNL_VF_OFFLOAD in any other cases + */ + if (ice_is_dvm_ena(&pf->hw) && ice_vf_is_port_vlan_ena(vf)) { + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; + } else if (!ice_is_dvm_ena(&pf->hw) && + !ice_vf_is_port_vlan_ena(vf)) { + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; + /* configure backward compatible support for VFs that + * only support VIRTCHNL_VF_OFFLOAD_VLAN, the PF is + * configured in SVM, and no port VLAN is configured + */ + ice_vf_vsi_cfg_svm_legacy_vlan_mode(vsi); + } else if (ice_is_dvm_ena(&pf->hw)) { + /* configure software offloaded VLAN support when DVM + * is enabled, but no port VLAN is enabled + */ + ice_vf_vsi_cfg_dvm_legacy_vlan_mode(vsi); + } + } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; @@ -2327,7 +2535,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->num_vsis = 1; /* Tx and Rx queue are equal for VF */ vfres->num_queue_pairs = vsi->num_txq; - vfres->max_vectors = pf->num_msix_per_vf; + vfres->max_vectors = pf->vfs.num_msix_per; vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE; vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vfres->max_mtu = ice_vc_get_max_frame_size(vf); @@ -2401,7 +2609,7 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) vsi = ice_find_vsi_from_id(pf, vsi_id); - return (vsi && (vsi->vf_id == vf->vf_id)); + return (vsi && (vsi->vf == vf)); } /** @@ -2886,70 +3094,54 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; - struct ice_vsi_ctx *ctx; struct ice_vsi *vf_vsi; struct device *dev; struct ice_vf *vf; int ret; dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; vf_vsi = ice_get_vf_vsi(vf); if (!vf_vsi) { netdev_err(netdev, "VSI %d for VF %d is null\n", vf->lan_vsi_idx, vf->vf_id); - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } if (vf_vsi->type != ICE_VSI_VF) { netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", vf_vsi->type, vf_vsi->vsi_num, vf->vf_id); - return -ENODEV; + ret = -ENODEV; + goto out_put_vf; } if (ena == vf->spoofchk) { dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF"); - return 0; - } - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->info.sec_flags = vf_vsi->info.sec_flags; - ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); - if (ena) { - ctx->info.sec_flags |= - ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | - (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); - } else { - ctx->info.sec_flags &= - ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | - (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << - ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)); - } - - ret = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL); - if (ret) { - dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %d\n", - ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, ret); - goto out; + ret = 0; + goto out_put_vf; } - /* only update spoofchk state and VSI context on success */ - vf_vsi->info.sec_flags = ctx->info.sec_flags; - vf->spoofchk = ena; + if (ena) + ret = ice_vsi_ena_spoofchk(vf_vsi); + else + ret = ice_vsi_dis_spoofchk(vf_vsi); + if (ret) + dev_err(dev, "Failed to set spoofchk %s for VF %d VSI %d\n error %d\n", + ena ? "ON" : "OFF", vf->vf_id, vf_vsi->vsi_num, ret); + else + vf->spoofchk = ena; -out: - kfree(ctx); +out_put_vf: + ice_put_vf(vf); return ret; } @@ -2962,18 +3154,22 @@ out: */ bool ice_is_any_vf_in_promisc(struct ice_pf *pf) { - int vf_idx; - - ice_for_each_vf(pf, vf_idx) { - struct ice_vf *vf = &pf->vf[vf_idx]; + bool is_vf_promisc = false; + struct ice_vf *vf; + unsigned int bkt; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { /* found a VF that has promiscuous mode configured */ if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || - test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) - return true; + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { + is_vf_promisc = true; + break; + } } + rcu_read_unlock(); - return false; + return is_vf_promisc; } /** @@ -2989,6 +3185,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) bool rm_promisc, alluni = false, allmulti = false; struct virtchnl_promisc_info *info = (struct virtchnl_promisc_info *)msg; + struct ice_vsi_vlan_ops *vlan_ops; int mcast_err = 0, ucast_err = 0; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; @@ -3027,16 +3224,15 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) rm_promisc = !allmulti && !alluni; - if (vsi->num_vlan || vf->port_vlan_info) { - if (rm_promisc) - ret = ice_cfg_vlan_pruning(vsi, true); - else - ret = ice_cfg_vlan_pruning(vsi, false); - if (ret) { - dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n"); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + if (rm_promisc) + ret = vlan_ops->ena_rx_filtering(vsi); + else + ret = vlan_ops->dis_rx_filtering(vsi); + if (ret) { + dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; } if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) { @@ -3063,7 +3259,8 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) } else { u8 mcast_m, ucast_m; - if (vf->port_vlan_info || vsi->num_vlan > 1) { + if (ice_vf_is_port_vlan_ena(vf) || + ice_vsi_has_non_zero_vlans(vsi)) { mcast_m = ICE_MCAST_VLAN_PROMISC_BITS; ucast_m = ICE_UCAST_VLAN_PROMISC_BITS; } else { @@ -3492,7 +3689,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) * there is actually at least a single VF queue vector mapped */ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - pf->num_msix_per_vf < num_q_vectors_mapped || + pf->vfs.num_msix_per < num_q_vectors_mapped || !num_q_vectors_mapped) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -3514,7 +3711,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) /* vector_id is always 0-based for each VF, and can never be * larger than or equal to the max allowed interrupts per VF */ - if (!(vector_id < pf->num_msix_per_vf) || + if (!(vector_id < pf->vfs.num_msix_per) || !ice_vc_isvalid_vsi_id(vf, vsi_id) || (!vector_id && (map->rxq_map || map->txq_map))) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -3646,7 +3843,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* add space for the port VLAN since the VF driver is not * expected to account for it in the MTU calculation */ - if (vf->port_vlan_info) + if (ice_vf_is_port_vlan_ena(vf)) vsi->max_frame += VLAN_HLEN; if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { @@ -4058,6 +4255,33 @@ error_param: } /** + * ice_is_supported_port_vlan_proto - make sure the vlan_proto is supported + * @hw: hardware structure used to check the VLAN mode + * @vlan_proto: VLAN TPID being checked + * + * If the device is configured in Double VLAN Mode (DVM), then both ETH_P_8021Q + * and ETH_P_8021AD are supported. If the device is configured in Single VLAN + * Mode (SVM), then only ETH_P_8021Q is supported. + */ +static bool +ice_is_supported_port_vlan_proto(struct ice_hw *hw, u16 vlan_proto) +{ + bool is_supported = false; + + switch (vlan_proto) { + case ETH_P_8021Q: + is_supported = true; + break; + case ETH_P_8021AD: + if (ice_is_dvm_ena(hw)) + is_supported = true; + break; + } + + return is_supported; +} + +/** * ice_set_vf_port_vlan * @netdev: network interface device structure * @vf_id: VF identifier @@ -4072,14 +4296,12 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, __be16 vlan_proto) { struct ice_pf *pf = ice_netdev_to_pf(netdev); + u16 local_vlan_proto = ntohs(vlan_proto); struct device *dev; struct ice_vf *vf; - u16 vlanprio; int ret; dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) - return -EINVAL; if (vlan_id >= VLAN_N_VID || qos > 7) { dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n", @@ -4087,38 +4309,45 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, return -EINVAL; } - if (vlan_proto != htons(ETH_P_8021Q)) { - dev_err(dev, "VF VLAN protocol is not supported\n"); + if (!ice_is_supported_port_vlan_proto(&pf->hw, local_vlan_proto)) { + dev_err(dev, "VF VLAN protocol 0x%04x is not supported\n", + local_vlan_proto); return -EPROTONOSUPPORT; } - vf = &pf->vf[vf_id]; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; - - vlanprio = vlan_id | (qos << VLAN_PRIO_SHIFT); + goto out_put_vf; - if (vf->port_vlan_info == vlanprio) { + if (ice_vf_get_port_vlan_prio(vf) == qos && + ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto && + ice_vf_get_port_vlan_id(vf) == vlan_id) { /* duplicate request, so just return success */ - dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio); - return 0; + dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x request\n", + vlan_id, qos, local_vlan_proto); + ret = 0; + goto out_put_vf; } mutex_lock(&vf->cfg_lock); - vf->port_vlan_info = vlanprio; - - if (vf->port_vlan_info) - dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n", - vlan_id, qos, vf_id); + vf->port_vlan_info = ICE_VLAN(local_vlan_proto, vlan_id, qos); + if (ice_vf_is_port_vlan_ena(vf)) + dev_info(dev, "Setting VLAN %u, QoS %u, TPID 0x%04x on VF %d\n", + vlan_id, qos, local_vlan_proto, vf_id); else dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id); ice_vc_reset_vf(vf); mutex_unlock(&vf->cfg_lock); - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -4133,6 +4362,83 @@ static bool ice_vf_vlan_offload_ena(u32 caps) } /** + * ice_is_vlan_promisc_allowed - check if VLAN promiscuous config is allowed + * @vf: VF used to determine if VLAN promiscuous config is allowed + */ +static bool ice_is_vlan_promisc_allowed(struct ice_vf *vf) +{ + if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) && + test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, vf->pf->flags)) + return true; + + return false; +} + +/** + * ice_vf_ena_vlan_promisc - Enable Tx/Rx VLAN promiscuous for the VLAN + * @vsi: VF's VSI used to enable VLAN promiscuous mode + * @vlan: VLAN used to enable VLAN promiscuous + * + * This function should only be called if VLAN promiscuous mode is allowed, + * which can be determined via ice_is_vlan_promisc_allowed(). + */ +static int ice_vf_ena_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan) +{ + u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX; + int status; + + status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, + vlan->vid); + if (status && status != -EEXIST) + return status; + + return 0; +} + +/** + * ice_vf_dis_vlan_promisc - Disable Tx/Rx VLAN promiscuous for the VLAN + * @vsi: VF's VSI used to disable VLAN promiscuous mode for + * @vlan: VLAN used to disable VLAN promiscuous + * + * This function should only be called if VLAN promiscuous mode is allowed, + * which can be determined via ice_is_vlan_promisc_allowed(). + */ +static int ice_vf_dis_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan) +{ + u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX; + int status; + + status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, + vlan->vid); + if (status && status != -ENOENT) + return status; + + return 0; +} + +/** + * ice_vf_has_max_vlans - check if VF already has the max allowed VLAN filters + * @vf: VF to check against + * @vsi: VF's VSI + * + * If the VF is trusted then the VF is allowed to add as many VLANs as it + * wants to, so return false. + * + * When the VF is untrusted compare the number of non-zero VLANs + 1 to the max + * allowed VLANs for an untrusted VF. Return the result of this comparison. + */ +static bool ice_vf_has_max_vlans(struct ice_vf *vf, struct ice_vsi *vsi) +{ + if (ice_is_vf_trusted(vf)) + return false; + +#define ICE_VF_ADDED_VLAN_ZERO_FLTRS 1 + return ((ice_vsi_num_non_zero_vlans(vsi) + + ICE_VF_ADDED_VLAN_ZERO_FLTRS) >= ICE_MAX_VLAN_PER_VF); +} + +/** * ice_vc_process_vlan_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -4149,9 +4455,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) bool vlan_promisc = false; struct ice_vsi *vsi; struct device *dev; - struct ice_hw *hw; int status = 0; - u8 promisc_m; int i; dev = ice_pf_to_dev(pf); @@ -4179,15 +4483,13 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) } } - hw = &pf->hw; vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (add_v && !ice_is_vf_trusted(vf) && - vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { + if (add_v && ice_vf_has_max_vlans(vf, vsi)) { dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); /* There is no need to let VF know about being not trusted, @@ -4196,22 +4498,28 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } - if (vsi->info.pvid) { + /* in DVM a VF can add/delete inner VLAN filters when + * VIRTCHNL_VF_OFFLOAD_VLAN is negotiated, so only reject in SVM + */ + if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&pf->hw)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || - test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) && - test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) - vlan_promisc = true; + /* in DVM VLAN promiscuous is based on the outer VLAN, which would be + * the port VLAN if VIRTCHNL_VF_OFFLOAD_VLAN was negotiated, so only + * allow vlan_promisc = true in SVM and if no port VLAN is configured + */ + vlan_promisc = ice_is_vlan_promisc_allowed(vf) && + !ice_is_dvm_ena(&pf->hw) && + !ice_vf_is_port_vlan_ena(vf); if (add_v) { for (i = 0; i < vfl->num_elements; i++) { u16 vid = vfl->vlan_id[i]; + struct ice_vlan vlan; - if (!ice_is_vf_trusted(vf) && - vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { + if (ice_vf_has_max_vlans(vf, vsi)) { dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); /* There is no need to let VF know about being @@ -4228,29 +4536,23 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (!vid) continue; - status = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); + vlan = ICE_VLAN(ETH_P_8021Q, vid, 0); + status = vsi->inner_vlan_ops.add_vlan(vsi, &vlan); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - /* Enable VLAN pruning when non-zero VLAN is added */ - if (!vlan_promisc && vid && - !ice_vsi_is_vlan_pruning_ena(vsi)) { - status = ice_cfg_vlan_pruning(vsi, true); - if (status) { + /* Enable VLAN filtering on first non-zero VLAN */ + if (!vlan_promisc && vid && !ice_is_dvm_ena(&pf->hw)) { + if (vsi->inner_vlan_ops.ena_rx_filtering(vsi)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", vid, status); goto error_param; } } else if (vlan_promisc) { - /* Enable Ucast/Mcast VLAN promiscuous mode */ - promisc_m = ICE_PROMISC_VLAN_TX | - ICE_PROMISC_VLAN_RX; - - status = ice_set_vsi_promisc(hw, vsi->idx, - promisc_m, vid); + status = ice_vf_ena_vlan_promisc(vsi, &vlan); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", @@ -4271,6 +4573,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) num_vf_vlan = vsi->num_vlan; for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) { u16 vid = vfl->vlan_id[i]; + struct ice_vlan vlan; /* we add VLAN 0 by default for each VF so we can enable * Tx VLAN anti-spoof without triggering MDD events so @@ -4279,28 +4582,19 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (!vid) continue; - /* Make sure ice_vsi_kill_vlan is successful before - * updating VLAN information - */ - status = ice_vsi_kill_vlan(vsi, vid); + vlan = ICE_VLAN(ETH_P_8021Q, vid, 0); + status = vsi->inner_vlan_ops.del_vlan(vsi, &vlan); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - /* Disable VLAN pruning when only VLAN 0 is left */ - if (vsi->num_vlan == 1 && - ice_vsi_is_vlan_pruning_ena(vsi)) - ice_cfg_vlan_pruning(vsi, false); + /* Disable VLAN filtering when only VLAN 0 is left */ + if (!ice_vsi_has_non_zero_vlans(vsi)) + vsi->inner_vlan_ops.dis_rx_filtering(vsi); - /* Disable Unicast/Multicast VLAN promiscuous mode */ - if (vlan_promisc) { - promisc_m = ICE_PROMISC_VLAN_TX | - ICE_PROMISC_VLAN_RX; - - ice_clear_vsi_promisc(hw, vsi->idx, - promisc_m, vid); - } + if (vlan_promisc) + ice_vf_dis_vlan_promisc(vsi, &vlan); } } @@ -4360,7 +4654,7 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf) } vsi = ice_get_vf_vsi(vf); - if (ice_vsi_manage_vlan_stripping(vsi, true)) + if (vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; error_param: @@ -4395,7 +4689,7 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf) goto error_param; } - if (ice_vsi_manage_vlan_stripping(vsi, false)) + if (vsi->inner_vlan_ops.dis_stripping(vsi)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; error_param: @@ -4407,11 +4701,8 @@ error_param: * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization * @vf: VF to enable/disable VLAN stripping for on initialization * - * If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if - * the flag is cleared then we want to disable stripping. For example, the flag - * will be cleared when port VLANs are configured by the administrator before - * passing the VF to the guest or if the AVF driver doesn't support VLAN - * offloads. + * Set the default for VLAN stripping based on whether a port VLAN is configured + * and the current VLAN mode of the device. */ static int ice_vf_init_vlan_stripping(struct ice_vf *vf) { @@ -4420,14 +4711,965 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf) if (!vsi) return -EINVAL; - /* don't modify stripping if port VLAN is configured */ - if (vsi->info.pvid) + /* don't modify stripping if port VLAN is configured in SVM since the + * port VLAN is based on the inner/single VLAN in SVM + */ + if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&vsi->back->hw)) return 0; if (ice_vf_vlan_offload_ena(vf->driver_caps)) - return ice_vsi_manage_vlan_stripping(vsi, true); + return vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q); else - return ice_vsi_manage_vlan_stripping(vsi, false); + return vsi->inner_vlan_ops.dis_stripping(vsi); +} + +static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf) +{ + if (vf->trusted) + return VLAN_N_VID; + else + return ICE_MAX_VLAN_PER_VF; +} + +/** + * ice_vf_outer_vlan_not_allowed - check outer VLAN can be used when the device is in DVM + * @vf: VF that being checked for + */ +static bool ice_vf_outer_vlan_not_allowed(struct ice_vf *vf) +{ + if (ice_vf_is_port_vlan_ena(vf)) + return true; + + return false; +} + +/** + * ice_vc_set_dvm_caps - set VLAN capabilities when the device is in DVM + * @vf: VF that capabilities are being set for + * @caps: VLAN capabilities to populate + * + * Determine VLAN capabilities support based on whether a port VLAN is + * configured. If a port VLAN is configured then the VF should use the inner + * filtering/offload capabilities since the port VLAN is using the outer VLAN + * capabilies. + */ +static void +ice_vc_set_dvm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps) +{ + struct virtchnl_vlan_supported_caps *supported_caps; + + if (ice_vf_outer_vlan_not_allowed(vf)) { + /* until support for inner VLAN filtering is added when a port + * VLAN is configured, only support software offloaded inner + * VLANs when a port VLAN is confgured in DVM + */ + supported_caps = &caps->filtering.filtering_support; + supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED; + + supported_caps = &caps->offloads.stripping_support; + supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + + supported_caps = &caps->offloads.insertion_support; + supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + + caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100; + caps->offloads.ethertype_match = + VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION; + } else { + supported_caps = &caps->filtering.filtering_support; + supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED; + supported_caps->outer = VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_ETHERTYPE_88A8 | + VIRTCHNL_VLAN_ETHERTYPE_9100 | + VIRTCHNL_VLAN_ETHERTYPE_AND; + caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_ETHERTYPE_88A8 | + VIRTCHNL_VLAN_ETHERTYPE_9100; + + supported_caps = &caps->offloads.stripping_support; + supported_caps->inner = VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1; + supported_caps->outer = VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_ETHERTYPE_88A8 | + VIRTCHNL_VLAN_ETHERTYPE_9100 | + VIRTCHNL_VLAN_ETHERTYPE_XOR | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2; + + supported_caps = &caps->offloads.insertion_support; + supported_caps->inner = VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1; + supported_caps->outer = VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_ETHERTYPE_88A8 | + VIRTCHNL_VLAN_ETHERTYPE_9100 | + VIRTCHNL_VLAN_ETHERTYPE_XOR | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2; + + caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100; + + caps->offloads.ethertype_match = + VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION; + } + + caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf); +} + +/** + * ice_vc_set_svm_caps - set VLAN capabilities when the device is in SVM + * @vf: VF that capabilities are being set for + * @caps: VLAN capabilities to populate + * + * Determine VLAN capabilities support based on whether a port VLAN is + * configured. If a port VLAN is configured then the VF does not have any VLAN + * filtering or offload capabilities since the port VLAN is using the inner VLAN + * capabilities in single VLAN mode (SVM). Otherwise allow the VF to use inner + * VLAN fitlering and offload capabilities. + */ +static void +ice_vc_set_svm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps) +{ + struct virtchnl_vlan_supported_caps *supported_caps; + + if (ice_vf_is_port_vlan_ena(vf)) { + supported_caps = &caps->filtering.filtering_support; + supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + + supported_caps = &caps->offloads.stripping_support; + supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + + supported_caps = &caps->offloads.insertion_support; + supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + + caps->offloads.ethertype_init = VIRTCHNL_VLAN_UNSUPPORTED; + caps->offloads.ethertype_match = VIRTCHNL_VLAN_UNSUPPORTED; + caps->filtering.max_filters = 0; + } else { + supported_caps = &caps->filtering.filtering_support; + supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100; + + supported_caps = &caps->offloads.stripping_support; + supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + + supported_caps = &caps->offloads.insertion_support; + supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 | + VIRTCHNL_VLAN_TOGGLE | + VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1; + supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED; + + caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100; + caps->offloads.ethertype_match = + VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION; + caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf); + } +} + +/** + * ice_vc_get_offload_vlan_v2_caps - determine VF's VLAN capabilities + * @vf: VF to determine VLAN capabilities for + * + * This will only be called if the VF and PF successfully negotiated + * VIRTCHNL_VF_OFFLOAD_VLAN_V2. + * + * Set VLAN capabilities based on the current VLAN mode and whether a port VLAN + * is configured or not. + */ +static int ice_vc_get_offload_vlan_v2_caps(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vlan_caps *caps = NULL; + int err, len = 0; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + caps = kzalloc(sizeof(*caps), GFP_KERNEL); + if (!caps) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + goto out; + } + len = sizeof(*caps); + + if (ice_is_dvm_ena(&vf->pf->hw)) + ice_vc_set_dvm_caps(vf, caps); + else + ice_vc_set_svm_caps(vf, caps); + + /* store negotiated caps to prevent invalid VF messages */ + memcpy(&vf->vlan_v2_caps, caps, sizeof(*caps)); + +out: + err = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS, + v_ret, (u8 *)caps, len); + kfree(caps); + return err; +} + +/** + * ice_vc_validate_vlan_tpid - validate VLAN TPID + * @filtering_caps: negotiated/supported VLAN filtering capabilities + * @tpid: VLAN TPID used for validation + * + * Convert the VLAN TPID to a VIRTCHNL_VLAN_ETHERTYPE_* and then compare against + * the negotiated/supported filtering caps to see if the VLAN TPID is valid. + */ +static bool ice_vc_validate_vlan_tpid(u16 filtering_caps, u16 tpid) +{ + enum virtchnl_vlan_support vlan_ethertype = VIRTCHNL_VLAN_UNSUPPORTED; + + switch (tpid) { + case ETH_P_8021Q: + vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_8100; + break; + case ETH_P_8021AD: + vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_88A8; + break; + case ETH_P_QINQ1: + vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_9100; + break; + } + + if (!(filtering_caps & vlan_ethertype)) + return false; + + return true; +} + +/** + * ice_vc_is_valid_vlan - validate the virtchnl_vlan + * @vc_vlan: virtchnl_vlan to validate + * + * If the VLAN TCI and VLAN TPID are 0, then this filter is invalid, so return + * false. Otherwise return true. + */ +static bool ice_vc_is_valid_vlan(struct virtchnl_vlan *vc_vlan) +{ + if (!vc_vlan->tci || !vc_vlan->tpid) + return false; + + return true; +} + +/** + * ice_vc_validate_vlan_filter_list - validate the filter list from the VF + * @vfc: negotiated/supported VLAN filtering capabilities + * @vfl: VLAN filter list from VF to validate + * + * Validate all of the filters in the VLAN filter list from the VF. If any of + * the checks fail then return false. Otherwise return true. + */ +static bool +ice_vc_validate_vlan_filter_list(struct virtchnl_vlan_filtering_caps *vfc, + struct virtchnl_vlan_filter_list_v2 *vfl) +{ + u16 i; + + if (!vfl->num_elements) + return false; + + for (i = 0; i < vfl->num_elements; i++) { + struct virtchnl_vlan_supported_caps *filtering_support = + &vfc->filtering_support; + struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i]; + struct virtchnl_vlan *outer = &vlan_fltr->outer; + struct virtchnl_vlan *inner = &vlan_fltr->inner; + + if ((ice_vc_is_valid_vlan(outer) && + filtering_support->outer == VIRTCHNL_VLAN_UNSUPPORTED) || + (ice_vc_is_valid_vlan(inner) && + filtering_support->inner == VIRTCHNL_VLAN_UNSUPPORTED)) + return false; + + if ((outer->tci_mask && + !(filtering_support->outer & VIRTCHNL_VLAN_FILTER_MASK)) || + (inner->tci_mask && + !(filtering_support->inner & VIRTCHNL_VLAN_FILTER_MASK))) + return false; + + if (((outer->tci & VLAN_PRIO_MASK) && + !(filtering_support->outer & VIRTCHNL_VLAN_PRIO)) || + ((inner->tci & VLAN_PRIO_MASK) && + !(filtering_support->inner & VIRTCHNL_VLAN_PRIO))) + return false; + + if ((ice_vc_is_valid_vlan(outer) && + !ice_vc_validate_vlan_tpid(filtering_support->outer, outer->tpid)) || + (ice_vc_is_valid_vlan(inner) && + !ice_vc_validate_vlan_tpid(filtering_support->inner, inner->tpid))) + return false; + } + + return true; +} + +/** + * ice_vc_to_vlan - transform from struct virtchnl_vlan to struct ice_vlan + * @vc_vlan: struct virtchnl_vlan to transform + */ +static struct ice_vlan ice_vc_to_vlan(struct virtchnl_vlan *vc_vlan) +{ + struct ice_vlan vlan = { 0 }; + + vlan.prio = (vc_vlan->tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + vlan.vid = vc_vlan->tci & VLAN_VID_MASK; + vlan.tpid = vc_vlan->tpid; + + return vlan; +} + +/** + * ice_vc_vlan_action - action to perform on the virthcnl_vlan + * @vsi: VF's VSI used to perform the action + * @vlan_action: function to perform the action with (i.e. add/del) + * @vlan: VLAN filter to perform the action with + */ +static int +ice_vc_vlan_action(struct ice_vsi *vsi, + int (*vlan_action)(struct ice_vsi *, struct ice_vlan *), + struct ice_vlan *vlan) +{ + int err; + + err = vlan_action(vsi, vlan); + if (err) + return err; + + return 0; +} + +/** + * ice_vc_del_vlans - delete VLAN(s) from the virtchnl filter list + * @vf: VF used to delete the VLAN(s) + * @vsi: VF's VSI used to delete the VLAN(s) + * @vfl: virthchnl filter list used to delete the filters + */ +static int +ice_vc_del_vlans(struct ice_vf *vf, struct ice_vsi *vsi, + struct virtchnl_vlan_filter_list_v2 *vfl) +{ + bool vlan_promisc = ice_is_vlan_promisc_allowed(vf); + int err; + u16 i; + + for (i = 0; i < vfl->num_elements; i++) { + struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i]; + struct virtchnl_vlan *vc_vlan; + + vc_vlan = &vlan_fltr->outer; + if (ice_vc_is_valid_vlan(vc_vlan)) { + struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan); + + err = ice_vc_vlan_action(vsi, + vsi->outer_vlan_ops.del_vlan, + &vlan); + if (err) + return err; + + if (vlan_promisc) + ice_vf_dis_vlan_promisc(vsi, &vlan); + } + + vc_vlan = &vlan_fltr->inner; + if (ice_vc_is_valid_vlan(vc_vlan)) { + struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan); + + err = ice_vc_vlan_action(vsi, + vsi->inner_vlan_ops.del_vlan, + &vlan); + if (err) + return err; + + /* no support for VLAN promiscuous on inner VLAN unless + * we are in Single VLAN Mode (SVM) + */ + if (!ice_is_dvm_ena(&vsi->back->hw) && vlan_promisc) + ice_vf_dis_vlan_promisc(vsi, &vlan); + } + } + + return 0; +} + +/** + * ice_vc_remove_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_DEL_VLAN_V2 + * @vf: VF the message was received from + * @msg: message received from the VF + */ +static int ice_vc_remove_vlan_v2_msg(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_vlan_filter_list_v2 *vfl = + (struct virtchnl_vlan_filter_list_v2 *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_vsi *vsi; + + if (!ice_vc_validate_vlan_filter_list(&vf->vlan_v2_caps.filtering, + vfl)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (ice_vc_del_vlans(vf, vsi, vfl)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + +out: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN_V2, v_ret, NULL, + 0); +} + +/** + * ice_vc_add_vlans - add VLAN(s) from the virtchnl filter list + * @vf: VF used to add the VLAN(s) + * @vsi: VF's VSI used to add the VLAN(s) + * @vfl: virthchnl filter list used to add the filters + */ +static int +ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi, + struct virtchnl_vlan_filter_list_v2 *vfl) +{ + bool vlan_promisc = ice_is_vlan_promisc_allowed(vf); + int err; + u16 i; + + for (i = 0; i < vfl->num_elements; i++) { + struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i]; + struct virtchnl_vlan *vc_vlan; + + vc_vlan = &vlan_fltr->outer; + if (ice_vc_is_valid_vlan(vc_vlan)) { + struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan); + + err = ice_vc_vlan_action(vsi, + vsi->outer_vlan_ops.add_vlan, + &vlan); + if (err) + return err; + + if (vlan_promisc) { + err = ice_vf_ena_vlan_promisc(vsi, &vlan); + if (err) + return err; + } + } + + vc_vlan = &vlan_fltr->inner; + if (ice_vc_is_valid_vlan(vc_vlan)) { + struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan); + + err = ice_vc_vlan_action(vsi, + vsi->inner_vlan_ops.add_vlan, + &vlan); + if (err) + return err; + + /* no support for VLAN promiscuous on inner VLAN unless + * we are in Single VLAN Mode (SVM) + */ + if (!ice_is_dvm_ena(&vsi->back->hw) && vlan_promisc) { + err = ice_vf_ena_vlan_promisc(vsi, &vlan); + if (err) + return err; + } + } + } + + return 0; +} + +/** + * ice_vc_validate_add_vlan_filter_list - validate add filter list from the VF + * @vsi: VF VSI used to get number of existing VLAN filters + * @vfc: negotiated/supported VLAN filtering capabilities + * @vfl: VLAN filter list from VF to validate + * + * Validate all of the filters in the VLAN filter list from the VF during the + * VIRTCHNL_OP_ADD_VLAN_V2 opcode. If any of the checks fail then return false. + * Otherwise return true. + */ +static bool +ice_vc_validate_add_vlan_filter_list(struct ice_vsi *vsi, + struct virtchnl_vlan_filtering_caps *vfc, + struct virtchnl_vlan_filter_list_v2 *vfl) +{ + u16 num_requested_filters = vsi->num_vlan + vfl->num_elements; + + if (num_requested_filters > vfc->max_filters) + return false; + + return ice_vc_validate_vlan_filter_list(vfc, vfl); +} + +/** + * ice_vc_add_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_ADD_VLAN_V2 + * @vf: VF the message was received from + * @msg: message received from the VF + */ +static int ice_vc_add_vlan_v2_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vlan_filter_list_v2 *vfl = + (struct virtchnl_vlan_filter_list_v2 *)msg; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (!ice_vc_validate_add_vlan_filter_list(vsi, + &vf->vlan_v2_caps.filtering, + vfl)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (ice_vc_add_vlans(vf, vsi, vfl)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + +out: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN_V2, v_ret, NULL, + 0); +} + +/** + * ice_vc_valid_vlan_setting - validate VLAN setting + * @negotiated_settings: negotiated VLAN settings during VF init + * @ethertype_setting: ethertype(s) requested for the VLAN setting + */ +static bool +ice_vc_valid_vlan_setting(u32 negotiated_settings, u32 ethertype_setting) +{ + if (ethertype_setting && !(negotiated_settings & ethertype_setting)) + return false; + + /* only allow a single VIRTCHNL_VLAN_ETHERTYPE if + * VIRTHCNL_VLAN_ETHERTYPE_AND is not negotiated/supported + */ + if (!(negotiated_settings & VIRTCHNL_VLAN_ETHERTYPE_AND) && + hweight32(ethertype_setting) > 1) + return false; + + /* ability to modify the VLAN setting was not negotiated */ + if (!(negotiated_settings & VIRTCHNL_VLAN_TOGGLE)) + return false; + + return true; +} + +/** + * ice_vc_valid_vlan_setting_msg - validate the VLAN setting message + * @caps: negotiated VLAN settings during VF init + * @msg: message to validate + * + * Used to validate any VLAN virtchnl message sent as a + * virtchnl_vlan_setting structure. Validates the message against the + * negotiated/supported caps during VF driver init. + */ +static bool +ice_vc_valid_vlan_setting_msg(struct virtchnl_vlan_supported_caps *caps, + struct virtchnl_vlan_setting *msg) +{ + if ((!msg->outer_ethertype_setting && + !msg->inner_ethertype_setting) || + (!caps->outer && !caps->inner)) + return false; + + if (msg->outer_ethertype_setting && + !ice_vc_valid_vlan_setting(caps->outer, + msg->outer_ethertype_setting)) + return false; + + if (msg->inner_ethertype_setting && + !ice_vc_valid_vlan_setting(caps->inner, + msg->inner_ethertype_setting)) + return false; + + return true; +} + +/** + * ice_vc_get_tpid - transform from VIRTCHNL_VLAN_ETHERTYPE_* to VLAN TPID + * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* used to get VLAN TPID + * @tpid: VLAN TPID to populate + */ +static int ice_vc_get_tpid(u32 ethertype_setting, u16 *tpid) +{ + switch (ethertype_setting) { + case VIRTCHNL_VLAN_ETHERTYPE_8100: + *tpid = ETH_P_8021Q; + break; + case VIRTCHNL_VLAN_ETHERTYPE_88A8: + *tpid = ETH_P_8021AD; + break; + case VIRTCHNL_VLAN_ETHERTYPE_9100: + *tpid = ETH_P_QINQ1; + break; + default: + *tpid = 0; + return -EINVAL; + } + + return 0; +} + +/** + * ice_vc_ena_vlan_offload - enable VLAN offload based on the ethertype_setting + * @vsi: VF's VSI used to enable the VLAN offload + * @ena_offload: function used to enable the VLAN offload + * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* to enable offloads for + */ +static int +ice_vc_ena_vlan_offload(struct ice_vsi *vsi, + int (*ena_offload)(struct ice_vsi *vsi, u16 tpid), + u32 ethertype_setting) +{ + u16 tpid; + int err; + + err = ice_vc_get_tpid(ethertype_setting, &tpid); + if (err) + return err; + + err = ena_offload(vsi, tpid); + if (err) + return err; + + return 0; +} + +#define ICE_L2TSEL_QRX_CONTEXT_REG_IDX 3 +#define ICE_L2TSEL_BIT_OFFSET 23 +enum ice_l2tsel { + ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND, + ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1, +}; + +/** + * ice_vsi_update_l2tsel - update l2tsel field for all Rx rings on this VSI + * @vsi: VSI used to update l2tsel on + * @l2tsel: l2tsel setting requested + * + * Use the l2tsel setting to update all of the Rx queue context bits for l2tsel. + * This will modify which descriptor field the first offloaded VLAN will be + * stripped into. + */ +static void ice_vsi_update_l2tsel(struct ice_vsi *vsi, enum ice_l2tsel l2tsel) +{ + struct ice_hw *hw = &vsi->back->hw; + u32 l2tsel_bit; + int i; + + if (l2tsel == ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND) + l2tsel_bit = 0; + else + l2tsel_bit = BIT(ICE_L2TSEL_BIT_OFFSET); + + for (i = 0; i < vsi->alloc_rxq; i++) { + u16 pfq = vsi->rxq_map[i]; + u32 qrx_context_offset; + u32 regval; + + qrx_context_offset = + QRX_CONTEXT(ICE_L2TSEL_QRX_CONTEXT_REG_IDX, pfq); + + regval = rd32(hw, qrx_context_offset); + regval &= ~BIT(ICE_L2TSEL_BIT_OFFSET); + regval |= l2tsel_bit; + wr32(hw, qrx_context_offset, regval); + } +} + +/** + * ice_vc_ena_vlan_stripping_v2_msg + * @vf: VF the message was received from + * @msg: message received from the VF + * + * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2 + */ +static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vlan_supported_caps *stripping_support; + struct virtchnl_vlan_setting *strip_msg = + (struct virtchnl_vlan_setting *)msg; + u32 ethertype_setting; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + stripping_support = &vf->vlan_v2_caps.offloads.stripping_support; + if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + ethertype_setting = strip_msg->outer_ethertype_setting; + if (ethertype_setting) { + if (ice_vc_ena_vlan_offload(vsi, + vsi->outer_vlan_ops.ena_stripping, + ethertype_setting)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } else { + enum ice_l2tsel l2tsel = + ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND; + + /* PF tells the VF that the outer VLAN tag is always + * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and + * inner is always extracted to + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to + * support outer stripping so the first tag always ends + * up in L2TAG2_2ND and the second/inner tag, if + * enabled, is extracted in L2TAG1. + */ + ice_vsi_update_l2tsel(vsi, l2tsel); + } + } + + ethertype_setting = strip_msg->inner_ethertype_setting; + if (ethertype_setting && + ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_stripping, + ethertype_setting)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + +out: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0); +} + +/** + * ice_vc_dis_vlan_stripping_v2_msg + * @vf: VF the message was received from + * @msg: message received from the VF + * + * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 + */ +static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vlan_supported_caps *stripping_support; + struct virtchnl_vlan_setting *strip_msg = + (struct virtchnl_vlan_setting *)msg; + u32 ethertype_setting; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + stripping_support = &vf->vlan_v2_caps.offloads.stripping_support; + if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + ethertype_setting = strip_msg->outer_ethertype_setting; + if (ethertype_setting) { + if (vsi->outer_vlan_ops.dis_stripping(vsi)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } else { + enum ice_l2tsel l2tsel = + ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1; + + /* PF tells the VF that the outer VLAN tag is always + * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and + * inner is always extracted to + * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to + * support inner stripping while outer stripping is + * disabled so that the first and only tag is extracted + * in L2TAG1. + */ + ice_vsi_update_l2tsel(vsi, l2tsel); + } + } + + ethertype_setting = strip_msg->inner_ethertype_setting; + if (ethertype_setting && vsi->inner_vlan_ops.dis_stripping(vsi)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + +out: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0); +} + +/** + * ice_vc_ena_vlan_insertion_v2_msg + * @vf: VF the message was received from + * @msg: message received from the VF + * + * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 + */ +static int ice_vc_ena_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vlan_supported_caps *insertion_support; + struct virtchnl_vlan_setting *insertion_msg = + (struct virtchnl_vlan_setting *)msg; + u32 ethertype_setting; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + insertion_support = &vf->vlan_v2_caps.offloads.insertion_support; + if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + ethertype_setting = insertion_msg->outer_ethertype_setting; + if (ethertype_setting && + ice_vc_ena_vlan_offload(vsi, vsi->outer_vlan_ops.ena_insertion, + ethertype_setting)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + ethertype_setting = insertion_msg->inner_ethertype_setting; + if (ethertype_setting && + ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_insertion, + ethertype_setting)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + +out: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2, v_ret, NULL, 0); +} + +/** + * ice_vc_dis_vlan_insertion_v2_msg + * @vf: VF the message was received from + * @msg: message received from the VF + * + * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 + */ +static int ice_vc_dis_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vlan_supported_caps *insertion_support; + struct virtchnl_vlan_setting *insertion_msg = + (struct virtchnl_vlan_setting *)msg; + u32 ethertype_setting; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + insertion_support = &vf->vlan_v2_caps.offloads.insertion_support; + if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + ethertype_setting = insertion_msg->outer_ethertype_setting; + if (ethertype_setting && vsi->outer_vlan_ops.dis_insertion(vsi)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + + ethertype_setting = insertion_msg->inner_ethertype_setting; + if (ethertype_setting && vsi->inner_vlan_ops.dis_insertion(vsi)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto out; + } + +out: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2, v_ret, NULL, 0); } static struct ice_vc_vf_ops ice_vc_vf_dflt_ops = { @@ -4452,6 +5694,13 @@ static struct ice_vc_vf_ops ice_vc_vf_dflt_ops = { .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, .add_fdir_fltr_msg = ice_vc_add_fdir_fltr, .del_fdir_fltr_msg = ice_vc_del_fdir_fltr, + .get_offload_vlan_v2_caps = ice_vc_get_offload_vlan_v2_caps, + .add_vlan_v2_msg = ice_vc_add_vlan_v2_msg, + .remove_vlan_v2_msg = ice_vc_remove_vlan_v2_msg, + .ena_vlan_stripping_v2_msg = ice_vc_ena_vlan_stripping_v2_msg, + .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg, + .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg, + .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg, }; void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops) @@ -4620,12 +5869,13 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) int err = 0; dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) { - err = -EINVAL; - goto error_handler; - } - vf = &pf->vf[vf_id]; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) { + dev_err(dev, "Unable to locate VF for message from VF ID %d, opcode %d, len %d\n", + vf_id, v_opcode, msglen); + return; + } /* Check if VF is disabled. */ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) { @@ -4648,6 +5898,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, 0); + ice_put_vf(vf); return; } @@ -4657,6 +5908,7 @@ error_handler: NULL, 0); dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", vf_id, v_opcode, msglen, err); + ice_put_vf(vf); return; } @@ -4666,6 +5918,7 @@ error_handler: if (!mutex_trylock(&vf->cfg_lock)) { dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", vf->vf_id); + ice_put_vf(vf); return; } @@ -4676,7 +5929,7 @@ error_handler: case VIRTCHNL_OP_GET_VF_RESOURCES: err = ops->get_vf_res_msg(vf, msg); if (ice_vf_init_vlan_stripping(vf)) - dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n", + dev_dbg(dev, "Failed to initialize VLAN stripping for VF %d\n", vf->vf_id); ice_vc_notify_vf_link_state(vf); break; @@ -4741,6 +5994,27 @@ error_handler: case VIRTCHNL_OP_DEL_RSS_CFG: err = ops->handle_rss_cfg_msg(vf, msg, false); break; + case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS: + err = ops->get_offload_vlan_v2_caps(vf); + break; + case VIRTCHNL_OP_ADD_VLAN_V2: + err = ops->add_vlan_v2_msg(vf, msg); + break; + case VIRTCHNL_OP_DEL_VLAN_V2: + err = ops->remove_vlan_v2_msg(vf, msg); + break; + case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2: + err = ops->ena_vlan_stripping_v2_msg(vf, msg); + break; + case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2: + err = ops->dis_vlan_stripping_v2_msg(vf, msg); + break; + case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2: + err = ops->ena_vlan_insertion_v2_msg(vf, msg); + break; + case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: + err = ops->dis_vlan_insertion_v2_msg(vf, msg); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode, @@ -4759,6 +6033,7 @@ error_handler: } mutex_unlock(&vf->cfg_lock); + ice_put_vf(vf); } /** @@ -4774,21 +6049,24 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; + int ret; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; ivi->vf = vf_id; ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr); /* VF configuration for VLAN and applicable QoS */ - ivi->vlan = vf->port_vlan_info & VLAN_VID_MASK; - ivi->qos = (vf->port_vlan_info & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + ivi->vlan = ice_vf_get_port_vlan_id(vf); + ivi->qos = ice_vf_get_port_vlan_prio(vf); + if (ice_vf_is_port_vlan_ena(vf)) + ivi->vlan_proto = cpu_to_be16(ice_vf_get_port_vlan_tpid(vf)); ivi->trusted = vf->trusted; ivi->spoofchk = vf->spoofchk; @@ -4800,7 +6078,10 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; ivi->max_tx_rate = vf->max_tx_rate; ivi->min_tx_rate = vf->min_tx_rate; - return 0; + +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -4850,28 +6131,31 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct ice_vf *vf; int ret; - if (ice_validate_vf_id(pf, vf_id)) - return -EINVAL; - if (is_multicast_ether_addr(mac)) { netdev_err(netdev, "%pM not a valid unicast address\n", mac); return -EINVAL; } - vf = &pf->vf[vf_id]; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + /* nothing left to do, unicast MAC already set */ if (ether_addr_equal(vf->dev_lan_addr.addr, mac) && - ether_addr_equal(vf->hw_lan_addr.addr, mac)) - return 0; + ether_addr_equal(vf->hw_lan_addr.addr, mac)) { + ret = 0; + goto out_put_vf; + } ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; if (ice_unicast_mac_exists(pf, mac)) { netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n", mac, vf_id, mac); - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } mutex_lock(&vf->cfg_lock); @@ -4895,7 +6179,10 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) ice_vc_reset_vf(vf); mutex_unlock(&vf->cfg_lock); - return 0; + +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -4917,17 +6204,19 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) return -EOPNOTSUPP; } - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; /* Check if already trusted */ - if (trusted == vf->trusted) - return 0; + if (trusted == vf->trusted) { + ret = 0; + goto out_put_vf; + } mutex_lock(&vf->cfg_lock); @@ -4938,7 +6227,9 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) mutex_unlock(&vf->cfg_lock); - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -4955,13 +6246,13 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) struct ice_vf *vf; int ret; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: @@ -4976,12 +6267,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) vf->link_up = false; break; default: - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } ice_vc_notify_vf_link_state(vf); - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -4990,10 +6284,14 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) */ static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) { - int rate = 0, i; + struct ice_vf *vf; + unsigned int bkt; + int rate = 0; - ice_for_each_vf(pf, i) - rate += pf->vf[i].min_tx_rate; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + rate += vf->min_tx_rate; + rcu_read_unlock(); return rate; } @@ -5048,13 +6346,14 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int ret; dev = ice_pf_to_dev(pf); - if (ice_validate_vf_id(pf, vf_id)) + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; vsi = ice_get_vf_vsi(vf); @@ -5064,23 +6363,27 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, if (max_tx_rate && min_tx_rate > max_tx_rate) { dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", min_tx_rate, max_tx_rate); - return -EINVAL; + ret = -EINVAL; + goto out_put_vf; } if (min_tx_rate && ice_is_dcb_active(pf)) { dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + goto out_put_vf; } - if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) - return -EINVAL; + if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) { + ret = -EINVAL; + goto out_put_vf; + } if (vf->min_tx_rate != (unsigned int)min_tx_rate) { ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); if (ret) { dev_err(dev, "Unable to set min-tx-rate for VF %d\n", vf->vf_id); - return ret; + goto out_put_vf; } vf->min_tx_rate = min_tx_rate; @@ -5091,13 +6394,15 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, if (ret) { dev_err(dev, "Unable to set max-tx-rate for VF %d\n", vf->vf_id); - return ret; + goto out_put_vf; } vf->max_tx_rate = max_tx_rate; } - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -5115,17 +6420,19 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, struct ice_vf *vf; int ret; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - vf = &pf->vf[vf_id]; ret = ice_check_vf_ready_for_cfg(vf); if (ret) - return ret; + goto out_put_vf; vsi = ice_get_vf_vsi(vf); - if (!vsi) - return -EINVAL; + if (!vsi) { + ret = -EINVAL; + goto out_put_vf; + } ice_update_eth_stats(vsi); stats = &vsi->eth_stats; @@ -5143,7 +6450,9 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, vf_stats->rx_dropped = stats->rx_discards; vf_stats->tx_dropped = stats->tx_discards; - return 0; +out_put_vf: + ice_put_vf(vf); + return ret; } /** @@ -5174,21 +6483,21 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - int i; + struct ice_vf *vf; + unsigned int bkt; /* check that there are pending MDD events to print */ if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state)) return; /* VF MDD event logs are rate limited to one second intervals */ - if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1)) + if (time_is_after_jiffies(pf->vfs.last_printed_mdd_jiffies + HZ * 1)) return; - pf->last_printed_mdd_jiffies = jiffies; - - ice_for_each_vf(pf, i) { - struct ice_vf *vf = &pf->vf[i]; + pf->vfs.last_printed_mdd_jiffies = jiffies; + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { /* only print Rx MDD event message if there are new events */ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) { vf->mdd_rx_events.last_printed = @@ -5202,10 +6511,11 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) vf->mdd_tx_events.count; dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", - vf->mdd_tx_events.count, hw->pf_id, i, + vf->mdd_tx_events.count, hw->pf_id, vf->vf_id, vf->dev_lan_addr.addr); } } + mutex_unlock(&pf->vfs.table_lock); } /** @@ -5257,13 +6567,12 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, struct ice_vf *vf; int status; - if (ice_validate_vf_id(pf, vf_id)) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return false; - vf = &pf->vf[vf_id]; - /* Check if VF is disabled. */ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) - return false; + goto out_put_vf; mbxdata.num_msg_proc = num_msg_proc; mbxdata.num_pending_arq = num_msg_pending; @@ -5274,7 +6583,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, /* check to see if we have a malicious VF */ status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf); if (status) - return false; + goto out_put_vf; if (malvf) { bool report_vf = false; @@ -5282,7 +6591,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, /* if the VF is malicious and we haven't let the user * know about it, then let them know now */ - status = ice_mbx_report_malvf(&pf->hw, pf->malvfs, + status = ice_mbx_report_malvf(&pf->hw, pf->vfs.malvfs, ICE_MAX_VF_COUNT, vf_id, &report_vf); if (status) @@ -5296,12 +6605,9 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, &vf->dev_lan_addr.addr[0], pf_vsi->netdev->dev_addr); } - - return true; } - /* if there was an error in detection or the VF is not malicious then - * return false - */ - return false; +out_put_vf: + ice_put_vf(vf); + return malvf; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 8f27255cc0cc..7f16ed9c70d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -5,6 +5,7 @@ #define _ICE_VIRTCHNL_PF_H_ #include "ice.h" #include "ice_virtchnl_fdir.h" +#include "ice_vsi_vlan_ops.h" /* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */ #define ICE_MAX_VLAN_PER_VF 8 @@ -37,8 +38,50 @@ #define ICE_MAX_VF_RESET_TRIES 40 #define ICE_MAX_VF_RESET_SLEEP_MS 20 -#define ice_for_each_vf(pf, i) \ - for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++) +/* VF Hash Table access functions + * + * These functions provide abstraction for interacting with the VF hash table. + * In general, direct access to the hash table should be avoided outside of + * these functions where possible. + * + * The VF entries in the hash table are protected by reference counting to + * track lifetime of accesses from the table. The ice_get_vf_by_id() function + * obtains a reference to the VF structure which must be dropped by using + * ice_put_vf(). + */ + +/** + * ice_for_each_vf - Iterate over each VF entry + * @pf: pointer to the PF private structure + * @bkt: bucket index used for iteration + * @vf: pointer to the VF entry currently being processed in the loop. + * + * The bkt variable is an unsigned integer iterator used to traverse the VF + * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. + * Use vf->vf_id to get the id number if needed. + * + * The caller is expected to be under the table_lock mutex for the entire + * loop. Use this iterator if your loop is long or if it might sleep. + */ +#define ice_for_each_vf(pf, bkt, vf) \ + hash_for_each((pf)->vfs.table, (bkt), (vf), entry) + +/** + * ice_for_each_vf_rcu - Iterate over each VF entry protected by RCU + * @pf: pointer to the PF private structure + * @bkt: bucket index used for iteration + * @vf: pointer to the VF entry currently being processed in the loop. + * + * The bkt variable is an unsigned integer iterator used to traverse the VF + * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. + * Use vf->vf_id to get the id number if needed. + * + * The caller is expected to be under rcu_read_lock() for the entire loop. + * Only use this iterator if your loop is short and you can guarantee it does + * not sleep. + */ +#define ice_for_each_vf_rcu(pf, bkt, vf) \ + hash_for_each_rcu((pf)->vfs.table, (bkt), (vf), entry) /* Specific VF states */ enum ice_vf_states { @@ -93,10 +136,31 @@ struct ice_vc_vf_ops { int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add); int (*add_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); int (*del_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); + int (*get_offload_vlan_v2_caps)(struct ice_vf *vf); + int (*add_vlan_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*remove_vlan_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); +}; + +/* Virtchnl/SR-IOV config info */ +struct ice_vfs { + DECLARE_HASHTABLE(table, 8); /* table of VF entries */ + struct mutex table_lock; /* Lock for protecting the hash table */ + u16 num_supported; /* max supported VFs on this PF */ + u16 num_qps_per; /* number of queue pairs per VF */ + u16 num_msix_per; /* number of MSI-X vectors per VF */ + unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */ + DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT); /* malicious VF indicator */ }; /* VF information structure */ struct ice_vf { + struct hlist_node entry; + struct rcu_head rcu; + struct kref refcnt; struct ice_pf *pf; /* Used during virtchnl message handling and NDO ops against the VF @@ -118,7 +182,8 @@ struct ice_vf { struct ice_time_mac legacy_last_added_umac; DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF); DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF); - u16 port_vlan_info; /* Port VLAN ID and QoS */ + struct ice_vlan port_vlan_info; /* Port VLAN ID, QoS, and TPID */ + struct virtchnl_vlan_caps vlan_v2_caps; u8 pf_set_mac:1; /* VF MAC address set by VMM admin */ u8 trusted:1; u8 spoofchk:1; @@ -150,6 +215,10 @@ struct ice_vf { }; #ifdef CONFIG_PCI_IOV +struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id); +void ice_put_vf(struct ice_vf *vf); +bool ice_has_vfs(struct ice_pf *pf); +u16 ice_get_num_vfs(struct ice_pf *pf); struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); void ice_process_vflr_event(struct ice_pf *pf); int ice_sriov_configure(struct pci_dev *pdev, int num_vfs); @@ -207,7 +276,27 @@ int ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); +bool ice_vf_is_port_vlan_ena(struct ice_vf *vf); #else /* CONFIG_PCI_IOV */ +static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) +{ + return NULL; +} + +static inline void ice_put_vf(struct ice_vf *vf) +{ +} + +static inline bool ice_has_vfs(struct ice_pf *pf) +{ + return false; +} + +static inline u16 ice_get_num_vfs(struct ice_pf *pf) +{ + return 0; +} + static inline void ice_process_vflr_event(struct ice_pf *pf) { } static inline void ice_free_vfs(struct ice_pf *pf) { } static inline @@ -339,5 +428,10 @@ static inline bool ice_is_any_vf_in_promisc(struct ice_pf __always_unused *pf) { return false; } + +static inline bool ice_vf_is_port_vlan_ena(struct ice_vf __always_unused *vf) +{ + return false; +} #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vlan.h b/drivers/net/ethernet/intel/ice/ice_vlan.h new file mode 100644 index 000000000000..bc4550a03173 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vlan.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_VLAN_H_ +#define _ICE_VLAN_H_ + +#include <linux/types.h> +#include "ice_type.h" + +struct ice_vlan { + u16 tpid; + u16 vid; + u8 prio; +}; + +#define ICE_VLAN(tpid, vid, prio) ((struct ice_vlan){ tpid, vid, prio }) + +#endif /* _ICE_VLAN_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c new file mode 100644 index 000000000000..1b618de592b7 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice_common.h" + +/** + * ice_pkg_get_supported_vlan_mode - determine if DDP supports Double VLAN mode + * @hw: pointer to the HW struct + * @dvm: output variable to determine if DDP supports DVM(true) or SVM(false) + */ +static int +ice_pkg_get_supported_vlan_mode(struct ice_hw *hw, bool *dvm) +{ + u16 meta_init_size = sizeof(struct ice_meta_init_section); + struct ice_meta_init_section *sect; + struct ice_buf_build *bld; + int status; + + /* if anything fails, we assume there is no DVM support */ + *dvm = false; + + bld = ice_pkg_buf_alloc_single_section(hw, + ICE_SID_RXPARSER_METADATA_INIT, + meta_init_size, (void **)§); + if (!bld) + return -ENOMEM; + + /* only need to read a single section */ + sect->count = cpu_to_le16(1); + sect->offset = cpu_to_le16(ICE_META_VLAN_MODE_ENTRY); + + status = ice_aq_upload_section(hw, + (struct ice_buf_hdr *)ice_pkg_buf(bld), + ICE_PKG_BUF_SIZE, NULL); + if (!status) { + DECLARE_BITMAP(entry, ICE_META_INIT_BITS); + u32 arr[ICE_META_INIT_DW_CNT]; + u16 i; + + /* convert to host bitmap format */ + for (i = 0; i < ICE_META_INIT_DW_CNT; i++) + arr[i] = le32_to_cpu(sect->entry.bm[i]); + + bitmap_from_arr32(entry, arr, (u16)ICE_META_INIT_BITS); + + /* check if DVM is supported */ + *dvm = test_bit(ICE_META_VLAN_MODE_BIT, entry); + } + + ice_pkg_buf_free(hw, bld); + + return status; +} + +/** + * ice_aq_get_vlan_mode - get the VLAN mode of the device + * @hw: pointer to the HW structure + * @get_params: structure FW fills in based on the current VLAN mode config + * + * Get VLAN Mode Parameters (0x020D) + */ +static int +ice_aq_get_vlan_mode(struct ice_hw *hw, + struct ice_aqc_get_vlan_mode *get_params) +{ + struct ice_aq_desc desc; + + if (!get_params) + return -EINVAL; + + ice_fill_dflt_direct_cmd_desc(&desc, + ice_aqc_opc_get_vlan_mode_parameters); + + return ice_aq_send_cmd(hw, &desc, get_params, sizeof(*get_params), + NULL); +} + +/** + * ice_aq_is_dvm_ena - query FW to check if double VLAN mode is enabled + * @hw: pointer to the HW structure + * + * Returns true if the hardware/firmware is configured in double VLAN mode, + * else return false signaling that the hardware/firmware is configured in + * single VLAN mode. + * + * Also, return false if this call fails for any reason (i.e. firmware doesn't + * support this AQ call). + */ +static bool ice_aq_is_dvm_ena(struct ice_hw *hw) +{ + struct ice_aqc_get_vlan_mode get_params = { 0 }; + int status; + + status = ice_aq_get_vlan_mode(hw, &get_params); + if (status) { + ice_debug(hw, ICE_DBG_AQ, "Failed to get VLAN mode, status %d\n", + status); + return false; + } + + return (get_params.vlan_mode & ICE_AQ_VLAN_MODE_DVM_ENA); +} + +/** + * ice_is_dvm_ena - check if double VLAN mode is enabled + * @hw: pointer to the HW structure + * + * The device is configured in single or double VLAN mode on initialization and + * this cannot be dynamically changed during runtime. Based on this there is no + * need to make an AQ call every time the driver needs to know the VLAN mode. + * Instead, use the cached VLAN mode. + */ +bool ice_is_dvm_ena(struct ice_hw *hw) +{ + return hw->dvm_ena; +} + +/** + * ice_cache_vlan_mode - cache VLAN mode after DDP is downloaded + * @hw: pointer to the HW structure + * + * This is only called after downloading the DDP and after the global + * configuration lock has been released because all ports on a device need to + * cache the VLAN mode. + */ +static void ice_cache_vlan_mode(struct ice_hw *hw) +{ + hw->dvm_ena = ice_aq_is_dvm_ena(hw) ? true : false; +} + +/** + * ice_pkg_supports_dvm - find out if DDP supports DVM + * @hw: pointer to the HW structure + */ +static bool ice_pkg_supports_dvm(struct ice_hw *hw) +{ + bool pkg_supports_dvm; + int status; + + status = ice_pkg_get_supported_vlan_mode(hw, &pkg_supports_dvm); + if (status) { + ice_debug(hw, ICE_DBG_PKG, "Failed to get supported VLAN mode, status %d\n", + status); + return false; + } + + return pkg_supports_dvm; +} + +/** + * ice_fw_supports_dvm - find out if FW supports DVM + * @hw: pointer to the HW structure + */ +static bool ice_fw_supports_dvm(struct ice_hw *hw) +{ + struct ice_aqc_get_vlan_mode get_vlan_mode = { 0 }; + int status; + + /* If firmware returns success, then it supports DVM, else it only + * supports SVM + */ + status = ice_aq_get_vlan_mode(hw, &get_vlan_mode); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to get VLAN mode, status %d\n", + status); + return false; + } + + return true; +} + +/** + * ice_is_dvm_supported - check if Double VLAN Mode is supported + * @hw: pointer to the hardware structure + * + * Returns true if Double VLAN Mode (DVM) is supported and false if only Single + * VLAN Mode (SVM) is supported. In order for DVM to be supported the DDP and + * firmware must support it, otherwise only SVM is supported. This function + * should only be called while the global config lock is held and after the + * package has been successfully downloaded. + */ +static bool ice_is_dvm_supported(struct ice_hw *hw) +{ + if (!ice_pkg_supports_dvm(hw)) { + ice_debug(hw, ICE_DBG_PKG, "DDP doesn't support DVM\n"); + return false; + } + + if (!ice_fw_supports_dvm(hw)) { + ice_debug(hw, ICE_DBG_PKG, "FW doesn't support DVM\n"); + return false; + } + + return true; +} + +#define ICE_EXTERNAL_VLAN_ID_FV_IDX 11 +#define ICE_SW_LKUP_VLAN_LOC_LKUP_IDX 1 +#define ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX 2 +#define ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX 2 +#define ICE_PKT_FLAGS_0_TO_15_FV_IDX 1 +#define ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK 0xD000 +static struct ice_update_recipe_lkup_idx_params ice_dvm_dflt_recipes[] = { + { + /* Update recipe ICE_SW_LKUP_VLAN to filter based on the + * outer/single VLAN in DVM + */ + .rid = ICE_SW_LKUP_VLAN, + .fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX, + .ignore_valid = true, + .mask = 0, + .mask_valid = false, /* use pre-existing mask */ + .lkup_idx = ICE_SW_LKUP_VLAN_LOC_LKUP_IDX, + }, + { + /* Update recipe ICE_SW_LKUP_VLAN to filter based on the VLAN + * packet flags to support VLAN filtering on multiple VLAN + * ethertypes (i.e. 0x8100 and 0x88a8) in DVM + */ + .rid = ICE_SW_LKUP_VLAN, + .fv_idx = ICE_PKT_FLAGS_0_TO_15_FV_IDX, + .ignore_valid = false, + .mask = ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK, + .mask_valid = true, + .lkup_idx = ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX, + }, + { + /* Update recipe ICE_SW_LKUP_PROMISC_VLAN to filter based on the + * outer/single VLAN in DVM + */ + .rid = ICE_SW_LKUP_PROMISC_VLAN, + .fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX, + .ignore_valid = true, + .mask = 0, + .mask_valid = false, /* use pre-existing mask */ + .lkup_idx = ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX, + }, +}; + +/** + * ice_dvm_update_dflt_recipes - update default switch recipes in DVM + * @hw: hardware structure used to update the recipes + */ +static int ice_dvm_update_dflt_recipes(struct ice_hw *hw) +{ + unsigned long i; + + for (i = 0; i < ARRAY_SIZE(ice_dvm_dflt_recipes); i++) { + struct ice_update_recipe_lkup_idx_params *params; + int status; + + params = &ice_dvm_dflt_recipes[i]; + + status = ice_update_recipe_lkup_idx(hw, params); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to update RID %d lkup_idx %d fv_idx %d mask_valid %s mask 0x%04x\n", + params->rid, params->lkup_idx, params->fv_idx, + params->mask_valid ? "true" : "false", + params->mask); + return status; + } + } + + return 0; +} + +/** + * ice_aq_set_vlan_mode - set the VLAN mode of the device + * @hw: pointer to the HW structure + * @set_params: requested VLAN mode configuration + * + * Set VLAN Mode Parameters (0x020C) + */ +static int +ice_aq_set_vlan_mode(struct ice_hw *hw, + struct ice_aqc_set_vlan_mode *set_params) +{ + u8 rdma_packet, mng_vlan_prot_id; + struct ice_aq_desc desc; + + if (!set_params) + return -EINVAL; + + if (set_params->l2tag_prio_tagging > ICE_AQ_VLAN_PRIO_TAG_MAX) + return -EINVAL; + + rdma_packet = set_params->rdma_packet; + if (rdma_packet != ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING && + rdma_packet != ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING) + return -EINVAL; + + mng_vlan_prot_id = set_params->mng_vlan_prot_id; + if (mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER && + mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER) + return -EINVAL; + + ice_fill_dflt_direct_cmd_desc(&desc, + ice_aqc_opc_set_vlan_mode_parameters); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + return ice_aq_send_cmd(hw, &desc, set_params, sizeof(*set_params), + NULL); +} + +/** + * ice_set_dvm - sets up software and hardware for double VLAN mode + * @hw: pointer to the hardware structure + */ +static int ice_set_dvm(struct ice_hw *hw) +{ + struct ice_aqc_set_vlan_mode params = { 0 }; + int status; + + params.l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG; + params.rdma_packet = ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING; + params.mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER; + + status = ice_aq_set_vlan_mode(hw, ¶ms); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to set double VLAN mode parameters, status %d\n", + status); + return status; + } + + status = ice_dvm_update_dflt_recipes(hw); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to update default recipes for double VLAN mode, status %d\n", + status); + return status; + } + + status = ice_aq_set_port_params(hw->port_info, true, NULL); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to set port in double VLAN mode, status %d\n", + status); + return status; + } + + status = ice_set_dvm_boost_entries(hw); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to set boost TCAM entries for double VLAN mode, status %d\n", + status); + return status; + } + + return 0; +} + +/** + * ice_set_svm - set single VLAN mode + * @hw: pointer to the HW structure + */ +static int ice_set_svm(struct ice_hw *hw) +{ + struct ice_aqc_set_vlan_mode *set_params; + int status; + + status = ice_aq_set_port_params(hw->port_info, false, NULL); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to set port parameters for single VLAN mode\n"); + return status; + } + + set_params = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*set_params), + GFP_KERNEL); + if (!set_params) + return -ENOMEM; + + /* default configuration for SVM configurations */ + set_params->l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG; + set_params->rdma_packet = ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING; + set_params->mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER; + + status = ice_aq_set_vlan_mode(hw, set_params); + if (status) + ice_debug(hw, ICE_DBG_INIT, "Failed to configure port in single VLAN mode\n"); + + devm_kfree(ice_hw_to_dev(hw), set_params); + return status; +} + +/** + * ice_set_vlan_mode + * @hw: pointer to the HW structure + */ +int ice_set_vlan_mode(struct ice_hw *hw) +{ + if (!ice_is_dvm_supported(hw)) + return 0; + + if (!ice_set_dvm(hw)) + return 0; + + return ice_set_svm(hw); +} + +/** + * ice_print_dvm_not_supported - print if DDP and/or FW doesn't support DVM + * @hw: pointer to the HW structure + * + * The purpose of this function is to print that QinQ is not supported due to + * incompatibilty from the DDP and/or FW. This will give a hint to the user to + * update one and/or both components if they expect QinQ functionality. + */ +static void ice_print_dvm_not_supported(struct ice_hw *hw) +{ + bool pkg_supports_dvm = ice_pkg_supports_dvm(hw); + bool fw_supports_dvm = ice_fw_supports_dvm(hw); + + if (!fw_supports_dvm && !pkg_supports_dvm) + dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package and NVM to versions that support QinQ.\n"); + else if (!pkg_supports_dvm) + dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package to a version that supports QinQ.\n"); + else if (!fw_supports_dvm) + dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your NVM to a version that supports QinQ.\n"); +} + +/** + * ice_post_pkg_dwnld_vlan_mode_cfg - configure VLAN mode after DDP download + * @hw: pointer to the HW structure + * + * This function is meant to configure any VLAN mode specific functionality + * after the global configuration lock has been released and the DDP has been + * downloaded. + * + * Since only one PF downloads the DDP and configures the VLAN mode there needs + * to be a way to configure the other PFs after the DDP has been downloaded and + * the global configuration lock has been released. All such code should go in + * this function. + */ +void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw) +{ + ice_cache_vlan_mode(hw); + + if (ice_is_dvm_ena(hw)) + ice_change_proto_id_to_dvm(); + else + ice_print_dvm_not_supported(hw); +} diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.h b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h new file mode 100644 index 000000000000..a0fb743d08e2 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_VLAN_MODE_H_ +#define _ICE_VLAN_MODE_H_ + +struct ice_hw; + +bool ice_is_dvm_ena(struct ice_hw *hw); +int ice_set_vlan_mode(struct ice_hw *hw); +void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw); + +#endif /* _ICE_VLAN_MODE_H */ diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c new file mode 100644 index 000000000000..5b4a0abb4607 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c @@ -0,0 +1,707 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice_vsi_vlan_lib.h" +#include "ice_lib.h" +#include "ice_fltr.h" +#include "ice.h" + +static void print_invalid_tpid(struct ice_vsi *vsi, u16 tpid) +{ + dev_err(ice_pf_to_dev(vsi->back), "%s %d specified invalid VLAN tpid 0x%04x\n", + ice_vsi_type_str(vsi->type), vsi->idx, tpid); +} + +/** + * validate_vlan - check if the ice_vlan passed in is valid + * @vsi: VSI used for printing error message + * @vlan: ice_vlan structure to validate + * + * Return true if the VLAN TPID is valid or if the VLAN TPID is 0 and the VLAN + * VID is 0, which allows for non-zero VLAN filters with the specified VLAN TPID + * and untagged VLAN 0 filters to be added to the prune list respectively. + */ +static bool validate_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) +{ + if (vlan->tpid != ETH_P_8021Q && vlan->tpid != ETH_P_8021AD && + vlan->tpid != ETH_P_QINQ1 && (vlan->tpid || vlan->vid)) { + print_invalid_tpid(vsi, vlan->tpid); + return false; + } + + return true; +} + +/** + * ice_vsi_add_vlan - default add VLAN implementation for all VSI types + * @vsi: VSI being configured + * @vlan: VLAN filter to add + */ +int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) +{ + int err; + + if (!validate_vlan(vsi, vlan)) + return -EINVAL; + + err = ice_fltr_add_vlan(vsi, vlan); + if (err && err != -EEXIST) { + dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n", + vlan->vid, vsi->vsi_num, err); + return err; + } + + vsi->num_vlan++; + return 0; +} + +/** + * ice_vsi_del_vlan - default del VLAN implementation for all VSI types + * @vsi: VSI being configured + * @vlan: VLAN filter to delete + */ +int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + int err; + + if (!validate_vlan(vsi, vlan)) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + + err = ice_fltr_remove_vlan(vsi, vlan); + if (!err) + vsi->num_vlan--; + else if (err == -ENOENT || err == -EBUSY) + err = 0; + else + dev_err(dev, "Error removing VLAN %d on VSI %i error: %d\n", + vlan->vid, vsi->vsi_num, err); + + return err; +} + +/** + * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx + * @vsi: the VSI being changed + */ +static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctxt; + int err; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + /* Here we are configuring the VSI to let the driver add VLAN tags by + * setting inner_vlan_flags to ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL. The actual VLAN tag + * insertion happens in the Tx hot path, in ice_tx_map. + */ + ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; + + /* Preserve existing VLAN strip setting */ + ctxt->info.inner_vlan_flags |= (vsi->info.inner_vlan_flags & + ICE_AQ_VSI_INNER_VLAN_EMODE_M); + + ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); + + err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + goto out; + } + + vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags; +out: + kfree(ctxt); + return err; +} + +/** + * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx + * @vsi: the VSI being changed + * @ena: boolean value indicating if this is a enable or disable request + */ +static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctxt; + int err; + + /* do not allow modifying VLAN stripping when a port VLAN is configured + * on this VSI + */ + if (vsi->info.port_based_inner_vlan) + return 0; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + /* Here we are configuring what the VSI should do with the VLAN tag in + * the Rx packet. We can either leave the tag in the packet or put it in + * the Rx descriptor. + */ + if (ena) + /* Strip VLAN tag from Rx packet and put it in the desc */ + ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH; + else + /* Disable stripping. Leave tag in packet */ + ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + + /* Allow all packets untagged/tagged */ + ctxt->info.inner_vlan_flags |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; + + ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); + + err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %s\n", + ena, err, ice_aq_str(hw->adminq.sq_last_status)); + goto out; + } + + vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags; +out: + kfree(ctxt); + return err; +} + +int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, const u16 tpid) +{ + if (tpid != ETH_P_8021Q) { + print_invalid_tpid(vsi, tpid); + return -EINVAL; + } + + return ice_vsi_manage_vlan_stripping(vsi, true); +} + +int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi) +{ + return ice_vsi_manage_vlan_stripping(vsi, false); +} + +int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, const u16 tpid) +{ + if (tpid != ETH_P_8021Q) { + print_invalid_tpid(vsi, tpid); + return -EINVAL; + } + + return ice_vsi_manage_vlan_insertion(vsi); +} + +int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi) +{ + return ice_vsi_manage_vlan_insertion(vsi); +} + +/** + * __ice_vsi_set_inner_port_vlan - set port VLAN VSI context settings to enable a port VLAN + * @vsi: the VSI to update + * @pvid_info: VLAN ID and QoS used to set the PVID VSI context field + */ +static int __ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, u16 pvid_info) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_aqc_vsi_props *info; + struct ice_vsi_ctx *ctxt; + int ret; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + ctxt->info = vsi->info; + info = &ctxt->info; + info->inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED | + ICE_AQ_VSI_INNER_VLAN_INSERT_PVID | + ICE_AQ_VSI_INNER_VLAN_EMODE_STR; + info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + + info->port_based_inner_vlan = cpu_to_le16(pvid_info); + info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID | + ICE_AQ_VSI_PROP_SW_VALID); + + ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (ret) { + dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n", + ret, ice_aq_str(hw->adminq.sq_last_status)); + goto out; + } + + vsi->info.inner_vlan_flags = info->inner_vlan_flags; + vsi->info.sw_flags2 = info->sw_flags2; + vsi->info.port_based_inner_vlan = info->port_based_inner_vlan; +out: + kfree(ctxt); + return ret; +} + +int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) +{ + u16 port_vlan_info; + + if (vlan->tpid != ETH_P_8021Q) + return -EINVAL; + + if (vlan->prio > 7) + return -EINVAL; + + port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT); + + return __ice_vsi_set_inner_port_vlan(vsi, port_vlan_info); +} + +/** + * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI + * @vsi: VSI to enable or disable VLAN pruning on + * @ena: set to true to enable VLAN pruning and false to disable it + * + * returns 0 if VSI is updated, negative otherwise + */ +static int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena) +{ + struct ice_vsi_ctx *ctxt; + struct ice_pf *pf; + int status; + + if (!vsi) + return -EINVAL; + + /* Don't enable VLAN pruning if the netdev is currently in promiscuous + * mode. VLAN pruning will be enabled when the interface exits + * promiscuous mode if any VLAN filters are active. + */ + if (vsi->netdev && vsi->netdev->flags & IFF_PROMISC && ena) + return 0; + + pf = vsi->back; + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + ctxt->info = vsi->info; + + if (ena) + ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + else + ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + + ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); + + status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL); + if (status) { + netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %s\n", + ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status, + ice_aq_str(pf->hw.adminq.sq_last_status)); + goto err_out; + } + + vsi->info.sw_flags2 = ctxt->info.sw_flags2; + + kfree(ctxt); + return 0; + +err_out: + kfree(ctxt); + return status; +} + +int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi) +{ + return ice_cfg_vlan_pruning(vsi, true); +} + +int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi) +{ + return ice_cfg_vlan_pruning(vsi, false); +} + +static int ice_cfg_vlan_antispoof(struct ice_vsi *vsi, bool enable) +{ + struct ice_vsi_ctx *ctx; + int err; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->info.sec_flags = vsi->info.sec_flags; + ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + + if (enable) + ctx->info.sec_flags |= ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S; + else + ctx->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); + + err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx VLAN anti-spoof %s for VSI %d, error %d\n", + enable ? "ON" : "OFF", vsi->vsi_num, err); + else + vsi->info.sec_flags = ctx->info.sec_flags; + + kfree(ctx); + + return err; +} + +int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi) +{ + return ice_cfg_vlan_antispoof(vsi, true); +} + +int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi) +{ + return ice_cfg_vlan_antispoof(vsi, false); +} + +/** + * tpid_to_vsi_outer_vlan_type - convert from TPID to VSI context based tag_type + * @tpid: tpid used to translate into VSI context based tag_type + * @tag_type: output variable to hold the VSI context based tag type + */ +static int tpid_to_vsi_outer_vlan_type(u16 tpid, u8 *tag_type) +{ + switch (tpid) { + case ETH_P_8021Q: + *tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_8100; + break; + case ETH_P_8021AD: + *tag_type = ICE_AQ_VSI_OUTER_TAG_STAG; + break; + case ETH_P_QINQ1: + *tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_9100; + break; + default: + *tag_type = 0; + return -EINVAL; + } + + return 0; +} + +/** + * ice_vsi_ena_outer_stripping - enable outer VLAN stripping + * @vsi: VSI to configure + * @tpid: TPID to enable outer VLAN stripping for + * + * Enable outer VLAN stripping via VSI context. This function should only be + * used if DVM is supported. Also, this function should never be called directly + * as it should be part of ice_vsi_vlan_ops if it's needed. + * + * Since the VSI context only supports a single TPID for insertion and + * stripping, setting the TPID for stripping will affect the TPID for insertion. + * Callers need to be aware of this limitation. + * + * Only modify outer VLAN stripping settings and the VLAN TPID. Outer VLAN + * insertion settings are unmodified. + * + * This enables hardware to strip a VLAN tag with the specified TPID to be + * stripped from the packet and placed in the receive descriptor. + */ +int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctxt; + u8 tag_type; + int err; + + /* do not allow modifying VLAN stripping when a port VLAN is configured + * on this VSI + */ + if (vsi->info.port_based_outer_vlan) + return 0; + + if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type)) + return -EINVAL; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + ctxt->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID); + /* clear current outer VLAN strip settings */ + ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags & + ~(ICE_AQ_VSI_OUTER_VLAN_EMODE_M | ICE_AQ_VSI_OUTER_TAG_TYPE_M); + ctxt->info.outer_vlan_flags |= + ((ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH << + ICE_AQ_VSI_OUTER_VLAN_EMODE_S) | + ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) & + ICE_AQ_VSI_OUTER_TAG_TYPE_M)); + + err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN stripping failed, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + else + vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags; + + kfree(ctxt); + return err; +} + +/** + * ice_vsi_dis_outer_stripping - disable outer VLAN stripping + * @vsi: VSI to configure + * + * Disable outer VLAN stripping via VSI context. This function should only be + * used if DVM is supported. Also, this function should never be called directly + * as it should be part of ice_vsi_vlan_ops if it's needed. + * + * Only modify the outer VLAN stripping settings. The VLAN TPID and outer VLAN + * insertion settings are unmodified. + * + * This tells the hardware to not strip any VLAN tagged packets, thus leaving + * them in the packet. This enables software offloaded VLAN stripping and + * disables hardware offloaded VLAN stripping. + */ +int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctxt; + int err; + + if (vsi->info.port_based_outer_vlan) + return 0; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + ctxt->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID); + /* clear current outer VLAN strip settings */ + ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags & + ~ICE_AQ_VSI_OUTER_VLAN_EMODE_M; + ctxt->info.outer_vlan_flags |= ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING << + ICE_AQ_VSI_OUTER_VLAN_EMODE_S; + + err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN stripping failed, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + else + vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags; + + kfree(ctxt); + return err; +} + +/** + * ice_vsi_ena_outer_insertion - enable outer VLAN insertion + * @vsi: VSI to configure + * @tpid: TPID to enable outer VLAN insertion for + * + * Enable outer VLAN insertion via VSI context. This function should only be + * used if DVM is supported. Also, this function should never be called directly + * as it should be part of ice_vsi_vlan_ops if it's needed. + * + * Since the VSI context only supports a single TPID for insertion and + * stripping, setting the TPID for insertion will affect the TPID for stripping. + * Callers need to be aware of this limitation. + * + * Only modify outer VLAN insertion settings and the VLAN TPID. Outer VLAN + * stripping settings are unmodified. + * + * This allows a VLAN tag with the specified TPID to be inserted in the transmit + * descriptor. + */ +int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctxt; + u8 tag_type; + int err; + + if (vsi->info.port_based_outer_vlan) + return 0; + + if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type)) + return -EINVAL; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + ctxt->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID); + /* clear current outer VLAN insertion settings */ + ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags & + ~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT | + ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC | + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M | + ICE_AQ_VSI_OUTER_TAG_TYPE_M); + ctxt->info.outer_vlan_flags |= + ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M) | + ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) & + ICE_AQ_VSI_OUTER_TAG_TYPE_M); + + err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN insertion failed, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + else + vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags; + + kfree(ctxt); + return err; +} + +/** + * ice_vsi_dis_outer_insertion - disable outer VLAN insertion + * @vsi: VSI to configure + * + * Disable outer VLAN insertion via VSI context. This function should only be + * used if DVM is supported. Also, this function should never be called directly + * as it should be part of ice_vsi_vlan_ops if it's needed. + * + * Only modify the outer VLAN insertion settings. The VLAN TPID and outer VLAN + * settings are unmodified. + * + * This tells the hardware to not allow any VLAN tagged packets in the transmit + * descriptor. This enables software offloaded VLAN insertion and disables + * hardware offloaded VLAN insertion. + */ +int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctxt; + int err; + + if (vsi->info.port_based_outer_vlan) + return 0; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + ctxt->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID); + /* clear current outer VLAN insertion settings */ + ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags & + ~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT | + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M); + ctxt->info.outer_vlan_flags |= + ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC | + ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M); + + err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN insertion failed, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + else + vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags; + + kfree(ctxt); + return err; +} + +/** + * __ice_vsi_set_outer_port_vlan - set the outer port VLAN and related settings + * @vsi: VSI to configure + * @vlan_info: packed u16 that contains the VLAN prio and ID + * @tpid: TPID of the port VLAN + * + * Set the port VLAN prio, ID, and TPID. + * + * Enable VLAN pruning so the VSI doesn't receive any traffic that doesn't match + * a VLAN prune rule. The caller should take care to add a VLAN prune rule that + * matches the port VLAN ID and TPID. + * + * Tell hardware to strip outer VLAN tagged packets on receive and don't put + * them in the receive descriptor. VSI(s) in port VLANs should not be aware of + * the port VLAN ID or TPID they are assigned to. + * + * Tell hardware to prevent outer VLAN tag insertion on transmit and only allow + * untagged outer packets from the transmit descriptor. + * + * Also, tell the hardware to insert the port VLAN on transmit. + */ +static int +__ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, u16 vlan_info, u16 tpid) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctxt; + u8 tag_type; + int err; + + if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type)) + return -EINVAL; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (!ctxt) + return -ENOMEM; + + ctxt->info = vsi->info; + + ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + + ctxt->info.port_based_outer_vlan = cpu_to_le16(vlan_info); + ctxt->info.outer_vlan_flags = + (ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW << + ICE_AQ_VSI_OUTER_VLAN_EMODE_S) | + ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) & + ICE_AQ_VSI_OUTER_TAG_TYPE_M) | + ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC | + (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED << + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) | + ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT; + + ctxt->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID | + ICE_AQ_VSI_PROP_SW_VALID); + + err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "update VSI for setting outer port based VLAN failed, err %d aq_err %s\n", + err, ice_aq_str(hw->adminq.sq_last_status)); + } else { + vsi->info.port_based_outer_vlan = ctxt->info.port_based_outer_vlan; + vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags; + vsi->info.sw_flags2 = ctxt->info.sw_flags2; + } + + kfree(ctxt); + return err; +} + +/** + * ice_vsi_set_outer_port_vlan - public version of __ice_vsi_set_outer_port_vlan + * @vsi: VSI to configure + * @vlan: ice_vlan structure used to set the port VLAN + * + * Set the outer port VLAN via VSI context. This function should only be + * used if DVM is supported. Also, this function should never be called directly + * as it should be part of ice_vsi_vlan_ops if it's needed. + * + * This function does not support clearing the port VLAN as there is currently + * no use case for this. + * + * Use the ice_vlan structure passed in to set this VSI in a port VLAN. + */ +int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) +{ + u16 port_vlan_info; + + if (vlan->prio > (VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT)) + return -EINVAL; + + port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT); + + return __ice_vsi_set_outer_port_vlan(vsi, port_vlan_info, vlan->tpid); +} diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h new file mode 100644 index 000000000000..f459909490ec --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_VSI_VLAN_LIB_H_ +#define _ICE_VSI_VLAN_LIB_H_ + +#include <linux/types.h> +#include "ice_vlan.h" + +struct ice_vsi; + +int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan); +int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan); + +int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, u16 tpid); +int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi); +int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, u16 tpid); +int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi); +int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan); + +int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi); +int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi); +int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi); +int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi); + +int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid); +int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi); +int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid); +int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi); +int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan); + +#endif /* _ICE_VSI_VLAN_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c new file mode 100644 index 000000000000..4a6c850d83ac --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice_pf_vsi_vlan_ops.h" +#include "ice_vf_vsi_vlan_ops.h" +#include "ice_lib.h" +#include "ice.h" + +static int +op_unsupported_vlan_arg(struct ice_vsi * __always_unused vsi, + struct ice_vlan * __always_unused vlan) +{ + return -EOPNOTSUPP; +} + +static int +op_unsupported_tpid_arg(struct ice_vsi *__always_unused vsi, + u16 __always_unused tpid) +{ + return -EOPNOTSUPP; +} + +static int op_unsupported(struct ice_vsi *__always_unused vsi) +{ + return -EOPNOTSUPP; +} + +/* If any new ops are added to the VSI VLAN ops interface then an unsupported + * implementation should be set here. + */ +static struct ice_vsi_vlan_ops ops_unsupported = { + .add_vlan = op_unsupported_vlan_arg, + .del_vlan = op_unsupported_vlan_arg, + .ena_stripping = op_unsupported_tpid_arg, + .dis_stripping = op_unsupported, + .ena_insertion = op_unsupported_tpid_arg, + .dis_insertion = op_unsupported, + .ena_rx_filtering = op_unsupported, + .dis_rx_filtering = op_unsupported, + .ena_tx_filtering = op_unsupported, + .dis_tx_filtering = op_unsupported, + .set_port_vlan = op_unsupported_vlan_arg, +}; + +/** + * ice_vsi_init_unsupported_vlan_ops - init all VSI VLAN ops to unsupported + * @vsi: VSI to initialize VSI VLAN ops to unsupported for + * + * By default all inner and outer VSI VLAN ops return -EOPNOTSUPP. This was done + * as oppsed to leaving the ops null to prevent unexpected crashes. Instead if + * an unsupported VSI VLAN op is called it will just return -EOPNOTSUPP. + * + */ +static void ice_vsi_init_unsupported_vlan_ops(struct ice_vsi *vsi) +{ + vsi->outer_vlan_ops = ops_unsupported; + vsi->inner_vlan_ops = ops_unsupported; +} + +/** + * ice_vsi_init_vlan_ops - initialize type specific VSI VLAN ops + * @vsi: VSI to initialize ops for + * + * If any VSI types are added and/or require different ops than the PF or VF VSI + * then they will have to add a case here to handle that. Also, VSI type + * specific files should be added in the same manner that was done for PF VSI. + */ +void ice_vsi_init_vlan_ops(struct ice_vsi *vsi) +{ + /* Initialize all VSI types to have unsupported VSI VLAN ops */ + ice_vsi_init_unsupported_vlan_ops(vsi); + + switch (vsi->type) { + case ICE_VSI_PF: + case ICE_VSI_SWITCHDEV_CTRL: + ice_pf_vsi_init_vlan_ops(vsi); + break; + case ICE_VSI_VF: + ice_vf_vsi_init_vlan_ops(vsi); + break; + default: + dev_dbg(ice_pf_to_dev(vsi->back), "%s does not support VLAN operations\n", + ice_vsi_type_str(vsi->type)); + break; + } +} + +/** + * ice_get_compat_vsi_vlan_ops - Get VSI VLAN ops based on VLAN mode + * @vsi: VSI used to get the VSI VLAN ops + * + * This function is meant to be used when the caller doesn't know which VLAN ops + * to use (i.e. inner or outer). This allows backward compatibility for VLANs + * since most of the Outer VSI VLAN functins are not supported when + * the device is configured in Single VLAN Mode (SVM). + */ +struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi) +{ + if (ice_is_dvm_ena(&vsi->back->hw)) + return &vsi->outer_vlan_ops; + else + return &vsi->inner_vlan_ops; +} diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h new file mode 100644 index 000000000000..5b47568f6256 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_VSI_VLAN_OPS_H_ +#define _ICE_VSI_VLAN_OPS_H_ + +#include "ice_type.h" +#include "ice_vsi_vlan_lib.h" + +struct ice_vsi; + +struct ice_vsi_vlan_ops { + int (*add_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan); + int (*del_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan); + int (*ena_stripping)(struct ice_vsi *vsi, const u16 tpid); + int (*dis_stripping)(struct ice_vsi *vsi); + int (*ena_insertion)(struct ice_vsi *vsi, const u16 tpid); + int (*dis_insertion)(struct ice_vsi *vsi); + int (*ena_rx_filtering)(struct ice_vsi *vsi); + int (*dis_rx_filtering)(struct ice_vsi *vsi); + int (*ena_tx_filtering)(struct ice_vsi *vsi); + int (*dis_tx_filtering)(struct ice_vsi *vsi); + int (*set_port_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan); +}; + +void ice_vsi_init_vlan_ops(struct ice_vsi *vsi); +struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi); + +#endif /* _ICE_VSI_VLAN_OPS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 2388837d6d6c..88853a6ed931 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -327,6 +327,13 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) bool if_running, pool_present = !!pool; int ret = 0, pool_failure = 0; + if (!is_power_of_2(vsi->rx_rings[qid]->count) || + !is_power_of_2(vsi->tx_rings[qid]->count)) { + netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n"); + pool_failure = -EINVAL; + goto failure; + } + if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); if (if_running) { @@ -349,6 +356,7 @@ xsk_pool_if_up: netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); } +failure: if (pool_failure) { netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n", pool_present ? "en" : "dis", pool_failure); @@ -359,33 +367,28 @@ xsk_pool_if_up: } /** - * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers - * @rx_ring: Rx ring + * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it + * @pool: XSK Buffer pool to pull the buffers from + * @xdp: SW ring of xdp_buff that will hold the buffers + * @rx_desc: Pointer to Rx descriptors that will be filled * @count: The number of buffers to allocate * * This function allocates a number of Rx buffers from the fill ring * or the internal recycle mechanism and places them on the Rx ring. * - * Returns true if all allocations were successful, false if any fail. + * Note that ring wrap should be handled by caller of this function. + * + * Returns the amount of allocated Rx descriptors */ -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, + union ice_32b_rx_flex_desc *rx_desc, u16 count) { - union ice_32b_rx_flex_desc *rx_desc; - u16 ntu = rx_ring->next_to_use; - struct xdp_buff **xdp; - u32 nb_buffs, i; dma_addr_t dma; + u16 buffs; + int i; - rx_desc = ICE_RX_DESC(rx_ring, ntu); - xdp = ice_xdp_buf(rx_ring, ntu); - - nb_buffs = min_t(u16, count, rx_ring->count - ntu); - nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); - if (!nb_buffs) - return false; - - i = nb_buffs; - while (i--) { + buffs = xsk_buff_alloc_batch(pool, xdp, count); + for (i = 0; i < buffs; i++) { dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->wb.status_error0 = 0; @@ -394,13 +397,77 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) xdp++; } + return buffs; +} + +/** + * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Place the @count of descriptors onto Rx ring. Handle the ring wrap + * for case where space from next_to_use up to the end of ring is less + * than @count. Finally do a tail bump. + * + * Returns true if all allocations were successful, false if any fail. + */ +static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +{ + union ice_32b_rx_flex_desc *rx_desc; + u32 nb_buffs_extra = 0, nb_buffs; + u16 ntu = rx_ring->next_to_use; + u16 total_count = count; + struct xdp_buff **xdp; + + rx_desc = ICE_RX_DESC(rx_ring, ntu); + xdp = ice_xdp_buf(rx_ring, ntu); + + if (ntu + count >= rx_ring->count) { + nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, + rx_desc, + rx_ring->count - ntu); + rx_desc = ICE_RX_DESC(rx_ring, 0); + xdp = ice_xdp_buf(rx_ring, 0); + ntu = 0; + count -= nb_buffs_extra; + ice_release_rx_desc(rx_ring, 0); + } + + nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count); + ntu += nb_buffs; if (ntu == rx_ring->count) ntu = 0; - ice_release_rx_desc(rx_ring, ntu); + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); + + return total_count == (nb_buffs_extra + nb_buffs); +} + +/** + * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Wrapper for internal allocation routine; figure out how many tail + * bumps should take place based on the given threshold + * + * Returns true if all calls to internal alloc routine succeeded + */ +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +{ + u16 rx_thresh = ICE_RING_QUARTER(rx_ring); + u16 batched, leftover, i, tail_bumps; + + batched = ALIGN_DOWN(count, rx_thresh); + tail_bumps = batched / rx_thresh; + leftover = count & (rx_thresh - 1); - return count == nb_buffs; + for (i = 0; i < tail_bumps; i++) + if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) + return false; + return __ice_alloc_rx_bufs_zc(rx_ring, leftover); } /** @@ -428,20 +495,24 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) static struct sk_buff * ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { - unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } xsk_buff_free(xdp); return skb; @@ -528,7 +599,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); - if (!ice_test_staterr(rx_desc, stat_err_bits)) + if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits)) break; /* This memory barrier is needed to keep us from reading @@ -583,9 +654,7 @@ construct_skb: total_rx_bytes += skb->len; total_rx_packets++; - stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); - if (ice_test_staterr(rx_desc, stat_err_bits)) - vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); + vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc); rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & ICE_RX_FLEX_DESC_PTYPE_M; @@ -612,134 +681,221 @@ construct_skb: } /** - * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries + * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer * @xdp_ring: XDP Tx ring - * @budget: max number of frames to xmit + * @tx_buf: Tx buffer to clean + */ +static void +ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) +{ + xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); + xdp_ring->xdp_tx_active--; + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); +} + +/** + * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring + * @xdp_ring: XDP ring to clean + * @napi_budget: amount of descriptors that NAPI allows us to clean * - * Returns true if cleanup/transmission is done. + * Returns count of cleaned descriptors */ -static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget) +static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) { - struct ice_tx_desc *tx_desc = NULL; - bool work_done = true; - struct xdp_desc desc; - dma_addr_t dma; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + int budget = napi_budget / tx_thresh; + u16 next_dd = xdp_ring->next_dd; + u16 ntc, cleared_dds = 0; - while (likely(budget-- > 0)) { + do { + struct ice_tx_desc *next_dd_desc; + u16 desc_cnt = xdp_ring->count; struct ice_tx_buf *tx_buf; + u32 xsk_frames; + u16 i; - if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) { - xdp_ring->tx_stats.tx_busy++; - work_done = false; + next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); + if (!(next_dd_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) break; - } - tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; - - if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc)) - break; - - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, - desc.len); - - tx_buf->bytecount = desc.len; + cleared_dds++; + xsk_frames = 0; + if (likely(!xdp_ring->xdp_tx_active)) { + xsk_frames = tx_thresh; + goto skip; + } - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use); - tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = - ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0); + ntc = xdp_ring->next_to_clean; - xdp_ring->next_to_use++; - if (xdp_ring->next_to_use == xdp_ring->count) - xdp_ring->next_to_use = 0; - } + for (i = 0; i < tx_thresh; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; - if (tx_desc) { - ice_xdp_ring_update_tail(xdp_ring); - xsk_tx_release(xdp_ring->xsk_pool); - } + if (tx_buf->raw_buf) { + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + tx_buf->raw_buf = NULL; + } else { + xsk_frames++; + } - return budget > 0 && work_done; + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } +skip: + xdp_ring->next_to_clean += tx_thresh; + if (xdp_ring->next_to_clean >= desc_cnt) + xdp_ring->next_to_clean -= desc_cnt; + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + next_dd_desc->cmd_type_offset_bsz = 0; + next_dd = next_dd + tx_thresh; + if (next_dd >= desc_cnt) + next_dd = tx_thresh - 1; + } while (budget--); + + xdp_ring->next_dd = next_dd; + + return cleared_dds * tx_thresh; } /** - * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer - * @xdp_ring: XDP Tx ring - * @tx_buf: Tx buffer to clean + * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor + * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @desc: AF_XDP descriptor to pull the DMA address and length from + * @total_bytes: bytes accumulator that will be used for stats update */ -static void -ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) +static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, + unsigned int *total_bytes) { - xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); - dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); + struct ice_tx_desc *tx_desc; + dma_addr_t dma; + + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, + 0, desc->len, 0); + + *total_bytes += desc->len; } /** - * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries - * @xdp_ring: XDP Tx ring - * @budget: NAPI budget - * - * Returns true if cleanup/tranmission is done. + * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @total_bytes: bytes accumulator that will be used for stats update */ -bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget) +static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, + unsigned int *total_bytes) { - int total_packets = 0, total_bytes = 0; - s16 ntc = xdp_ring->next_to_clean; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u16 ntu = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; - struct ice_tx_buf *tx_buf; - u32 xsk_frames = 0; - bool xmit_done; + u32 i; - tx_desc = ICE_TX_DESC(xdp_ring, ntc); - tx_buf = &xdp_ring->tx_buf[ntc]; - ntc -= xdp_ring->count; + loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + dma_addr_t dma; - do { - if (!(tx_desc->cmd_type_offset_bsz & - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) - break; + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len); - total_bytes += tx_buf->bytecount; - total_packets++; + tx_desc = ICE_TX_DESC(xdp_ring, ntu++); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, + 0, descs[i].len, 0); - if (tx_buf->raw_buf) { - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); - tx_buf->raw_buf = NULL; - } else { - xsk_frames++; - } + *total_bytes += descs[i].len; + } - tx_desc->cmd_type_offset_bsz = 0; - tx_buf++; - tx_desc++; - ntc++; + xdp_ring->next_to_use = ntu; - if (unlikely(!ntc)) { - ntc -= xdp_ring->count; - tx_buf = xdp_ring->tx_buf; - tx_desc = ICE_TX_DESC(xdp_ring, 0); - } + if (xdp_ring->next_to_use > xdp_ring->next_rs) { + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += tx_thresh; + } +} - prefetch(tx_desc); +/** + * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @nb_pkts: count of packets to be send + * @total_bytes: bytes accumulator that will be used for stats update + */ +static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, + u32 nb_pkts, unsigned int *total_bytes) +{ + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u32 batched, leftover, i; + + batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); + leftover = nb_pkts & (PKTS_PER_BATCH - 1); + for (i = 0; i < batched; i += PKTS_PER_BATCH) + ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + for (; i < batched + leftover; i++) + ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); + + if (xdp_ring->next_to_use > xdp_ring->next_rs) { + struct ice_tx_desc *tx_desc; + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += tx_thresh; + } +} - } while (likely(--budget)); +/** + * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @budget: number of free descriptors on HW Tx ring that can be used + * @napi_budget: amount of descriptors that NAPI allows us to clean + * + * Returns true if there is no more work that needs to be done, false otherwise + */ +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget) +{ + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u32 nb_pkts, nb_processed = 0; + unsigned int total_bytes = 0; + + if (budget < tx_thresh) + budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget); + + nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); + if (!nb_pkts) + return true; + + if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { + struct ice_tx_desc *tx_desc; + + nb_processed = xdp_ring->count - xdp_ring->next_to_use; + ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs = tx_thresh - 1; + xdp_ring->next_to_use = 0; + } - ntc += xdp_ring->count; - xdp_ring->next_to_clean = ntc; + ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, + &total_bytes); - if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + ice_xdp_ring_update_tail(xdp_ring); + ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); - ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes); - xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK); - - return budget > 0 && xmit_done; + return nb_pkts < budget; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 4c7bd8e9dfc4..123bb98ebfbe 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -6,19 +6,37 @@ #include "ice_txrx.h" #include "ice.h" +#define PKTS_PER_BATCH 8 + +#ifdef __clang__ +#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for +#elif __GNUC__ >= 8 +#define loop_unrolled_for _Pragma("GCC unroll 8") for +#else +#define loop_unrolled_for for +#endif + struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); -bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget); #else +static inline bool +ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + u32 __always_unused budget, + int __always_unused napi_budget) +{ + return false; +} + static inline int ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, struct xsk_buff_pool __always_unused *pool, @@ -35,13 +53,6 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, } static inline bool -ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring, - int __always_unused budget) -{ - return false; -} - -static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, u16 __always_unused count) { diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 51a2dcaf553d..2a5782063f4c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -965,10 +965,6 @@ static int igb_set_ringparam(struct net_device *netdev, memcpy(&temp_ring[i], adapter->rx_ring[i], sizeof(struct igb_ring)); - /* Clear copied XDP RX-queue info */ - memset(&temp_ring[i].xdp_rxq, 0, - sizeof(temp_ring[i].xdp_rxq)); - temp_ring[i].count = new_rx_count; err = igb_setup_rx_resources(&temp_ring[i]); if (err) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 38ba92022cd4..34b33b21e0dc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3164,8 +3164,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) s32 ret_val; static int global_quad_port_a; /* global quad port a indication */ const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; - int err, pci_using_dac; u8 part_str[E1000_PBANUM_LENGTH]; + int err; /* Catch broken hardware that put the wrong VF device ID in * the PCIe SR-IOV capability. @@ -3180,17 +3180,11 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_mem_regions(pdev, igb_driver_name); @@ -3306,8 +3300,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac.type >= e1000_i350) netdev->hw_features |= NETIF_F_NTUPLE; - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_HW_CSUM; @@ -4352,7 +4345,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; - int size; + int size, res; + + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0); + if (res < 0) { + dev_err(dev, "Failed to register xdp_rxq index %u\n", + rx_ring->queue_index); + return res; + } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -4375,14 +4379,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) rx_ring->xdp_prog = adapter->xdp_prog; - /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, 0) < 0) - goto err; - return 0; err: + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index b78407289741..43ced78c3a2e 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2684,25 +2684,18 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct igbvf_adapter *adapter; struct e1000_hw *hw; const struct igbvf_info *ei = igbvf_info_tbl[ent->driver_data]; - static int cards_found; - int err, pci_using_dac; + int err; err = pci_enable_device_mem(pdev); if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_regions(pdev, igbvf_driver_name); @@ -2783,10 +2776,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_GSO_PARTIAL | IGBVF_GSO_PARTIAL_FEATURES; - netdev->features = netdev->hw_features; - - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_HW_CSUM; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2f17f36e94fd..74b2c590ed5d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -505,6 +505,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) u8 index = rx_ring->queue_index; int size, desc_len, res; + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, rx_ring->q_vector->napi.napi_id); if (res < 0) { @@ -2446,19 +2449,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; - unsigned int totalsize = metasize + datasize; struct sk_buff *skb; - skb = __napi_alloc_skb(&ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); - memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + if (metasize) { skb_metadata_set(skb, metasize); __skb_pull(skb, metasize); @@ -6251,23 +6255,17 @@ static int igc_probe(struct pci_dev *pdev, struct net_device *netdev; struct igc_hw *hw; const struct igc_info *ei = igc_info_tbl[ent->driver_data]; - int err, pci_using_dac; + int err; err = pci_enable_device_mem(pdev); if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_mem_regions(pdev, igc_driver_name); @@ -6367,8 +6365,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= netdev->features; - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_HW_CSUM; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 99d481904ce6..affdefcca7e3 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -361,7 +361,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *netdev = NULL; struct ixgb_adapter *adapter; static int cards_found = 0; - int pci_using_dac; u8 addr[ETH_ALEN]; int i; int err; @@ -370,16 +369,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - pci_using_dac = 0; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - pr_err("No usable DMA configuration, aborting\n"); - goto err_dma_mask; - } + if (err) { + pr_err("No usable DMA configuration, aborting\n"); + goto err_dma_mask; } err = pci_request_regions(pdev, ixgb_driver_name); @@ -444,10 +437,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_RXCSUM; - if (pci_using_dac) { - netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_HIGHDMA; - } + netdev->features |= NETIF_F_HIGHDMA; + netdev->vlan_features |= NETIF_F_HIGHDMA; /* MTU range: 68 - 16114 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 4a69823e6abd..921a4d977d65 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -177,11 +177,14 @@ struct vf_data_storage { u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; + int link_enable; + int link_state; u8 spoofchk_enabled; bool rss_query_enabled; u8 trusted; int xcast_mode; unsigned int vf_api; + u8 primary_abort_count; }; enum ixgbevf_xcast_modes { @@ -556,6 +559,8 @@ struct ixgbe_mac_addr { #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ) #define IXGBE_SFP_POLL_JIFFIES (2 * HZ) /* SFP poll every 2 seconds */ +#define IXGBE_PRIMARY_ABORT_LIMIT 5 + /* board specific private data structure */ struct ixgbe_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -614,6 +619,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_RX_LEGACY BIT(16) #define IXGBE_FLAG2_IPSEC_ENABLED BIT(17) #define IXGBE_FLAG2_VF_IPSEC_ENABLED BIT(18) +#define IXGBE_FLAG2_AUTO_DISABLE_VF BIT(19) /* Tx fast path data */ int num_tx_queues; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index e90b5047e695..4c26c4b92f07 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -30,7 +30,7 @@ static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data); static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, u16 offset); -static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw); +static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw); /* Base table for registers values that change by MAC */ const u32 ixgbe_mvals_8259X[IXGBE_MVALS_IDX_LIMIT] = { @@ -746,10 +746,10 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) usleep_range(1000, 2000); /* - * Prevent the PCI-E bus from from hanging by disabling PCI-E master + * Prevent the PCI-E bus from hanging by disabling PCI-E primary * access and verify no pending requests */ - return ixgbe_disable_pcie_master(hw); + return ixgbe_disable_pcie_primary(hw); } /** @@ -2506,15 +2506,15 @@ static u32 ixgbe_pcie_timeout_poll(struct ixgbe_hw *hw) } /** - * ixgbe_disable_pcie_master - Disable PCI-express master access + * ixgbe_disable_pcie_primary - Disable PCI-express primary access * @hw: pointer to hardware structure * - * Disables PCI-Express master access and verifies there are no pending - * requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable - * bit hasn't caused the master requests to be disabled, else 0 - * is returned signifying master requests disabled. + * Disables PCI-Express primary access and verifies there are no pending + * requests. IXGBE_ERR_PRIMARY_REQUESTS_PENDING is returned if primary disable + * bit hasn't caused the primary requests to be disabled, else 0 + * is returned signifying primary requests disabled. **/ -static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) +static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw) { u32 i, poll; u16 value; @@ -2523,23 +2523,23 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); /* Poll for bit to read as set */ - for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + for (i = 0; i < IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT; i++) { if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS) break; usleep_range(100, 120); } - if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) { + if (i >= IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT) { hw_dbg(hw, "GIO disable did not set - requesting resets\n"); goto gio_disable_fail; } - /* Exit if master requests are blocked */ + /* Exit if primary requests are blocked */ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) || ixgbe_removed(hw->hw_addr)) return 0; - /* Poll for master request bit to clear */ - for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + /* Poll for primary request bit to clear */ + for (i = 0; i < IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT; i++) { udelay(100); if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) return 0; @@ -2547,13 +2547,13 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) /* * Two consecutive resets are required via CTRL.RST per datasheet - * 5.2.5.3.2 Master Disable. We set a flag to inform the reset routine - * of this need. The first reset prevents new master requests from + * 5.2.5.3.2 Primary Disable. We set a flag to inform the reset routine + * of this need. The first reset prevents new primary requests from * being issued by our device. We then must wait 1usec or more for any * remaining completions from the PCIe bus to trickle in, and then reset * again to clear out any effects they may have had on our device. */ - hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); + hw_dbg(hw, "GIO Primary Disable bit didn't clear - requesting resets\n"); gio_disable_fail: hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; @@ -2575,7 +2575,7 @@ gio_disable_fail: } hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); - return IXGBE_ERR_MASTER_REQUESTS_PENDING; + return IXGBE_ERR_PRIMARY_REQUESTS_PENDING; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f70967c32116..628d0eb0599f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -138,6 +138,8 @@ static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = { "legacy-rx", #define IXGBE_PRIV_FLAGS_VF_IPSEC_EN BIT(1) "vf-ipsec", +#define IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF BIT(2) + "mdd-disable-vf", }; #define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings) @@ -3510,6 +3512,9 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev) if (adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED) priv_flags |= IXGBE_PRIV_FLAGS_VF_IPSEC_EN; + if (adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) + priv_flags |= IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF; + return priv_flags; } @@ -3517,6 +3522,7 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) { struct ixgbe_adapter *adapter = netdev_priv(netdev); unsigned int flags2 = adapter->flags2; + unsigned int i; flags2 &= ~IXGBE_FLAG2_RX_LEGACY; if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX) @@ -3526,6 +3532,21 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) if (priv_flags & IXGBE_PRIV_FLAGS_VF_IPSEC_EN) flags2 |= IXGBE_FLAG2_VF_IPSEC_ENABLED; + flags2 &= ~IXGBE_FLAG2_AUTO_DISABLE_VF; + if (priv_flags & IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF) { + if (adapter->hw.mac.type == ixgbe_mac_82599EB) { + /* Reset primary abort counter */ + for (i = 0; i < adapter->num_vfs; i++) + adapter->vfinfo[i].primary_abort_count = 0; + + flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF; + } else { + e_info(probe, + "Cannot set private flags: Operation not supported\n"); + return -EOPNOTSUPP; + } + } + if (flags2 != adapter->flags2) { adapter->flags2 = flags2; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 89b467006291..c4a4954aa317 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5687,6 +5687,9 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + + /* update setting rx tx for all active vfs */ + ixgbe_set_all_vfs(adapter); } void ixgbe_reinit_locked(struct ixgbe_adapter *adapter) @@ -5948,8 +5951,8 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) case IXGBE_ERR_SFP_NOT_PRESENT: case IXGBE_ERR_SFP_NOT_SUPPORTED: break; - case IXGBE_ERR_MASTER_REQUESTS_PENDING: - e_dev_err("master disable timed out\n"); + case IXGBE_ERR_PRIMARY_REQUESTS_PENDING: + e_dev_err("primary disable timed out\n"); break; case IXGBE_ERR_EEPROM_VERSION: /* We are running on a pre-production device, log a warning */ @@ -6144,11 +6147,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) for (i = 0 ; i < adapter->num_vfs; i++) adapter->vfinfo[i].clear_to_send = false; - /* ping all the active vfs to let them know we are going down */ - ixgbe_ping_all_vfs(adapter); - - /* Disable all VFTE/VFRE TX/RX */ - ixgbe_disable_tx_rx(adapter); + /* update setting rx tx for all active vfs */ + ixgbe_set_all_vfs(adapter); } /* disable transmits in the hardware now that interrupts are off */ @@ -7613,6 +7613,27 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) } #ifdef CONFIG_PCI_IOV +static void ixgbe_bad_vf_abort(struct ixgbe_adapter *adapter, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + + if (adapter->hw.mac.type == ixgbe_mac_82599EB && + adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) { + adapter->vfinfo[vf].primary_abort_count++; + if (adapter->vfinfo[vf].primary_abort_count == + IXGBE_PRIMARY_ABORT_LIMIT) { + ixgbe_set_vf_link_state(adapter, vf, + IFLA_VF_LINK_STATE_DISABLE); + adapter->vfinfo[vf].primary_abort_count = 0; + + e_info(drv, + "Malicious Driver Detection event detected on PF %d VF %d MAC: %pM mdd-disable-vf=on", + hw->bus.func, vf, + adapter->vfinfo[vf].vf_mac_addresses); + } + } +} + static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -7644,8 +7665,10 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) continue; pci_read_config_word(vfdev, PCI_STATUS, &status_reg); if (status_reg != IXGBE_FAILED_READ_CFG_WORD && - status_reg & PCI_STATUS_REC_MASTER_ABORT) + status_reg & PCI_STATUS_REC_MASTER_ABORT) { + ixgbe_bad_vf_abort(adapter, vf); pcie_flr(vfdev); + } } } @@ -10284,6 +10307,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, .ndo_set_vf_rate = ixgbe_ndo_set_vf_bw, .ndo_set_vf_spoofchk = ixgbe_ndo_set_vf_spoofchk, + .ndo_set_vf_link_state = ixgbe_ndo_set_vf_link_state, .ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en, .ndo_set_vf_trust = ixgbe_ndo_set_vf_trust, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, @@ -10632,9 +10656,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ixgbe_adapter *adapter = NULL; struct ixgbe_hw *hw; const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; - int i, err, pci_using_dac, expected_gts; unsigned int indices = MAX_TX_QUEUES; u8 part_str[IXGBE_PBANUM_LENGTH]; + int i, err, expected_gts; bool disable_dev = false; #ifdef IXGBE_FCOE u16 device_caps; @@ -10654,16 +10678,11 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } - pci_using_dac = 0; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_mem_regions(pdev, ixgbe_driver_name); @@ -10750,6 +10769,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init; + if (adapter->hw.mac.type == ixgbe_mac_82599EB) + adapter->flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF; + switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: @@ -10861,8 +10883,7 @@ skip_sriov: netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->hw_enc_features |= netdev->vlan_features; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index a148534d7256..8f4316b19278 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -85,6 +85,8 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_IPSEC_ADD 0x0d #define IXGBE_VF_IPSEC_DEL 0x0e +#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 214a38de3f41..7f11c0a8e7a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -96,6 +96,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter, for (i = 0; i < num_vfs; i++) { /* enable spoof checking for all VFs */ adapter->vfinfo[i].spoofchk_enabled = true; + adapter->vfinfo[i].link_enable = true; /* We support VF RSS querying only for 82599 and x540 * devices at the moment. These devices share RSS @@ -820,6 +821,57 @@ static inline void ixgbe_write_qde(struct ixgbe_adapter *adapter, u32 vf, } } +/** + * ixgbe_set_vf_rx_tx - Set VF rx tx + * @adapter: Pointer to adapter struct + * @vf: VF identifier + * + * Set or reset correct transmit and receive for vf + **/ +static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf) +{ + u32 reg_cur_tx, reg_cur_rx, reg_req_tx, reg_req_rx; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg_offset, vf_shift; + + vf_shift = vf % 32; + reg_offset = vf / 32; + + reg_cur_tx = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset)); + reg_cur_rx = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); + + if (adapter->vfinfo[vf].link_enable) { + reg_req_tx = reg_cur_tx | 1 << vf_shift; + reg_req_rx = reg_cur_rx | 1 << vf_shift; + } else { + reg_req_tx = reg_cur_tx & ~(1 << vf_shift); + reg_req_rx = reg_cur_rx & ~(1 << vf_shift); + } + + /* The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs. + * For more info take a look at ixgbe_set_vf_lpe + */ + if (adapter->hw.mac.type == ixgbe_mac_82599EB) { + struct net_device *dev = adapter->netdev; + int pf_max_frame = dev->mtu + ETH_HLEN; + +#if IS_ENABLED(CONFIG_FCOE) + if (dev->features & NETIF_F_FCOE_MTU) + pf_max_frame = max_t(int, pf_max_frame, + IXGBE_FCOE_JUMBO_FRAME_SIZE); +#endif /* CONFIG_FCOE */ + + if (pf_max_frame > ETH_FRAME_LEN) + reg_req_rx = reg_cur_rx & ~(1 << vf_shift); + } + + /* Enable/Disable particular VF */ + if (reg_cur_tx != reg_req_tx) + IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg_req_tx); + if (reg_cur_rx != reg_req_rx) + IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg_req_rx); +} + static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; @@ -845,11 +897,6 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) vf_shift = vf % 32; reg_offset = vf / 32; - /* enable transmit for vf */ - reg = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset)); - reg |= BIT(vf_shift); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg); - /* force drop enable for all VF Rx queues */ reg = IXGBE_QDE_ENABLE; if (adapter->vfinfo[vf].pf_vlan) @@ -857,27 +904,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) ixgbe_write_qde(adapter, vf, reg); - /* enable receive for vf */ - reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); - reg |= BIT(vf_shift); - /* - * The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs. - * For more info take a look at ixgbe_set_vf_lpe - */ - if (adapter->hw.mac.type == ixgbe_mac_82599EB) { - struct net_device *dev = adapter->netdev; - int pf_max_frame = dev->mtu + ETH_HLEN; - -#ifdef CONFIG_FCOE - if (dev->features & NETIF_F_FCOE_MTU) - pf_max_frame = max_t(int, pf_max_frame, - IXGBE_FCOE_JUMBO_FRAME_SIZE); - -#endif /* CONFIG_FCOE */ - if (pf_max_frame > ETH_FRAME_LEN) - reg &= ~BIT(vf_shift); - } - IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg); + ixgbe_set_vf_rx_tx(adapter, vf); /* enable VF mailbox for further messages */ adapter->vfinfo[vf].clear_to_send = true; @@ -1202,6 +1229,26 @@ out: return 0; } +static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + u32 *link_state = &msgbuf[1]; + + /* verify the PF is supporting the correct API */ + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: + case ixgbe_mbox_api_14: + break; + default: + return -EOPNOTSUPP; + } + + *link_state = adapter->vfinfo[vf].link_enable; + + return 0; +} + static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) { u32 mbx_size = IXGBE_VFMAILBOX_SIZE; @@ -1267,6 +1314,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) case IXGBE_VF_UPDATE_XCAST_MODE: retval = ixgbe_update_vf_xcast_mode(adapter, msgbuf, vf); break; + case IXGBE_VF_GET_LINK_STATE: + retval = ixgbe_get_vf_link_state(adapter, msgbuf, vf); + break; case IXGBE_VF_IPSEC_ADD: retval = ixgbe_ipsec_vf_add_sa(adapter, msgbuf, vf); break; @@ -1322,18 +1372,6 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) } } -void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - /* disable transmit and receive for all vfs */ - IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), 0); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), 0); - - IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), 0); - IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), 0); -} - static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf) { struct ixgbe_hw *hw = &adapter->hw; @@ -1359,6 +1397,21 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter) } } +/** + * ixgbe_set_all_vfs - update vfs queues + * @adapter: Pointer to adapter struct + * + * Update setting transmit and receive queues for all vfs + **/ +void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter) +{ + int i; + + for (i = 0 ; i < adapter->num_vfs; i++) + ixgbe_set_vf_link_state(adapter, i, + adapter->vfinfo[i].link_state); +} + int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) { struct ixgbe_adapter *adapter = netdev_priv(netdev); @@ -1656,6 +1709,84 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting) return 0; } +/** + * ixgbe_set_vf_link_state - Set link state + * @adapter: Pointer to adapter struct + * @vf: VF identifier + * @state: required link state + * + * Set a link force state on/off a single vf + **/ +void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state) +{ + adapter->vfinfo[vf].link_state = state; + + switch (state) { + case IFLA_VF_LINK_STATE_AUTO: + if (test_bit(__IXGBE_DOWN, &adapter->state)) + adapter->vfinfo[vf].link_enable = false; + else + adapter->vfinfo[vf].link_enable = true; + break; + case IFLA_VF_LINK_STATE_ENABLE: + adapter->vfinfo[vf].link_enable = true; + break; + case IFLA_VF_LINK_STATE_DISABLE: + adapter->vfinfo[vf].link_enable = false; + break; + } + + ixgbe_set_vf_rx_tx(adapter, vf); + + /* restart the VF */ + adapter->vfinfo[vf].clear_to_send = false; + ixgbe_ping_vf(adapter, vf); +} + +/** + * ixgbe_ndo_set_vf_link_state - Set link state + * @netdev: network interface device structure + * @vf: VF identifier + * @state: required link state + * + * Set the link state of a specified VF, regardless of physical link state + **/ +int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret = 0; + + if (vf < 0 || vf >= adapter->num_vfs) { + dev_err(&adapter->pdev->dev, + "NDO set VF link - invalid VF identifier %d\n", vf); + return -EINVAL; + } + + switch (state) { + case IFLA_VF_LINK_STATE_ENABLE: + dev_info(&adapter->pdev->dev, + "NDO set VF %d link state %d - not supported\n", + vf, state); + break; + case IFLA_VF_LINK_STATE_DISABLE: + dev_info(&adapter->pdev->dev, + "NDO set VF %d link state disable\n", vf); + ixgbe_set_vf_link_state(adapter, vf, state); + break; + case IFLA_VF_LINK_STATE_AUTO: + dev_info(&adapter->pdev->dev, + "NDO set VF %d link state auto\n", vf); + ixgbe_set_vf_link_state(adapter, vf, state); + break; + default: + dev_err(&adapter->pdev->dev, + "NDO set VF %d - invalid link state %d\n", vf, state); + ret = -EINVAL; + } + + return ret; +} + int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf, bool setting) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 3ec21923c89c..0690ecb8dfa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -17,8 +17,8 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); #endif void ixgbe_msg_task(struct ixgbe_adapter *adapter); int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); -void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); +void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, u8 qos, __be16 vlan_proto); @@ -31,7 +31,9 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf, int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting); int ixgbe_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); +int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state); void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); +void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state); int ixgbe_disable_sriov(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2647937f7f4d..6da9880d766a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1247,7 +1247,7 @@ struct ixgbe_nvm_version { #define IXGBE_PSRTYPE_RQPL_SHIFT 29 /* CTRL Bit Masks */ -#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Master Disable bit */ +#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Primary Disable bit */ #define IXGBE_CTRL_LNK_RST 0x00000008 /* Link Reset. Resets everything. */ #define IXGBE_CTRL_RST 0x04000000 /* Reset (SW) */ #define IXGBE_CTRL_RST_MASK (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST) @@ -1811,7 +1811,7 @@ enum { /* STATUS Bit Masks */ #define IXGBE_STATUS_LAN_ID 0x0000000C /* LAN ID */ #define IXGBE_STATUS_LAN_ID_SHIFT 2 /* LAN ID Shift*/ -#define IXGBE_STATUS_GIO 0x00080000 /* GIO Master Enable Status */ +#define IXGBE_STATUS_GIO 0x00080000 /* GIO Primary Enable Status */ #define IXGBE_STATUS_LAN_ID_0 0x00000000 /* LAN ID 0 */ #define IXGBE_STATUS_LAN_ID_1 0x00000004 /* LAN ID 1 */ @@ -2193,8 +2193,8 @@ enum { #define IXGBE_PCIDEVCTRL2_4_8s 0xd #define IXGBE_PCIDEVCTRL2_17_34s 0xe -/* Number of 100 microseconds we wait for PCI Express master disable */ -#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 +/* Number of 100 microseconds we wait for PCI Express primary disable */ +#define IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT 800 /* RAH */ #define IXGBE_RAH_VIND_MASK 0x003C0000 @@ -3671,7 +3671,7 @@ struct ixgbe_info { #define IXGBE_ERR_ADAPTER_STOPPED -9 #define IXGBE_ERR_INVALID_MAC_ADDR -10 #define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 -#define IXGBE_ERR_MASTER_REQUESTS_PENDING -12 +#define IXGBE_ERR_PRIMARY_REQUESTS_PENDING -12 #define IXGBE_ERR_INVALID_LINK_SETTINGS -13 #define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 #define IXGBE_ERR_RESET_FAILED -15 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6a5e9cf6b5da..dd7ff66d422f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -207,26 +207,28 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) } static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) + const struct xdp_buff *xdp) { - unsigned int metasize = bi->xdp->data - bi->xdp->data_meta; - unsigned int datasize = bi->xdp->data_end - bi->xdp->data; + unsigned int totalsize = xdp->data_end - xdp->data_meta; + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - bi->xdp->data_end - bi->xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } - xsk_buff_free(bi->xdp); - bi->xdp = NULL; return skb; } @@ -317,12 +319,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, } /* XDP_PASS path */ - skb = ixgbe_construct_skb_zc(rx_ring, bi); + skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp); if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } + xsk_buff_free(bi->xdp); + bi->xdp = NULL; + cleaned_count++; ixgbe_inc_ntc(rx_ring); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index e257390a4f6a..149c733fcc2b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -387,6 +387,8 @@ struct ixgbevf_adapter { u32 *rss_key; u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE]; u32 flags; + bool link_state; + #define IXGBEVF_FLAGS_LEGACY_RX BIT(1) #ifdef CONFIG_XFRM diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0f293acd17e8..55b87bc3a938 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2298,7 +2298,9 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; struct ixgbe_hw *hw = &adapter->hw; + bool state; ixgbevf_configure_msix(adapter); @@ -2311,6 +2313,11 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) spin_unlock_bh(&adapter->mbx_lock); + state = adapter->link_state; + hw->mac.ops.get_link_state(hw, &adapter->link_state); + if (state && state != adapter->link_state) + dev_info(&pdev->dev, "VF is administratively disabled\n"); + smp_mb__before_atomic(); clear_bit(__IXGBEVF_DOWN, &adapter->state); ixgbevf_napi_enable_all(adapter); @@ -2753,7 +2760,7 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, ring->reg_idx = reg_idx; /* assign ring to adapter */ - adapter->tx_ring[txr_idx] = ring; + adapter->tx_ring[txr_idx] = ring; /* update count and index */ txr_count--; @@ -3081,6 +3088,8 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) adapter->tx_ring_count = IXGBEVF_DEFAULT_TXD; adapter->rx_ring_count = IXGBEVF_DEFAULT_RXD; + adapter->link_state = true; + set_bit(__IXGBEVF_DOWN, &adapter->state); return 0; @@ -3313,7 +3322,7 @@ static void ixgbevf_watchdog_subtask(struct ixgbevf_adapter *adapter) ixgbevf_watchdog_update_link(adapter); - if (adapter->link_up) + if (adapter->link_up && adapter->link_state) ixgbevf_watchdog_link_is_up(adapter); else ixgbevf_watchdog_link_is_down(adapter); @@ -4512,22 +4521,17 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ixgbevf_adapter *adapter = NULL; struct ixgbe_hw *hw = NULL; const struct ixgbevf_info *ii = ixgbevf_info_tbl[ent->driver_data]; - int err, pci_using_dac; bool disable_dev = false; + int err; err = pci_enable_device(pdev); if (err) return err; - if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { - pci_using_dac = 1; - } else { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); - goto err_dma; - } - pci_using_dac = 0; + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); + goto err_dma; } err = pci_request_regions(pdev, ixgbevf_driver_name); @@ -4607,10 +4611,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_GSO_PARTIAL | IXGBEVF_GSO_PARTIAL_FEATURES; - netdev->features = netdev->hw_features; - - if (pci_using_dac) - netdev->features |= NETIF_F_HIGHDMA; + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA; netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; netdev->mpls_features |= NETIF_F_SG | diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index 7346ccf014a5..835bbcc5cc8e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -100,6 +100,8 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_IPSEC_ADD 0x0d #define IXGBE_VF_IPSEC_DEL 0x0e +#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 61d8970c6d1d..68fc32e36e88 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -585,6 +585,46 @@ static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) } /** + * ixgbevf_get_link_state_vf - Get VF link state from PF + * @hw: pointer to the HW structure + * @link_state: link state storage + * + * Returns state of the operation error or success. + */ +static s32 ixgbevf_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state) +{ + u32 msgbuf[2]; + s32 ret_val; + s32 err; + + msgbuf[0] = IXGBE_VF_GET_LINK_STATE; + msgbuf[1] = 0x0; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + + if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { + ret_val = IXGBE_ERR_MBX; + } else { + ret_val = 0; + *link_state = msgbuf[1]; + } + + return ret_val; +} + +/** + * ixgbevf_hv_get_link_state_vf - * Hyper-V variant - just a stub. + * @hw: unused + * @link_state: unused + * + * Hyper-V variant; there is no mailbox communication. + */ +static s32 ixgbevf_hv_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state) +{ + return -EOPNOTSUPP; +} + +/** * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address * @hw: pointer to the HW structure * @vlan: 12 bit VLAN ID @@ -968,6 +1008,7 @@ static const struct ixgbe_mac_operations ixgbevf_mac_ops = { .set_rar = ixgbevf_set_rar_vf, .update_mc_addr_list = ixgbevf_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_update_xcast_mode, + .get_link_state = ixgbevf_get_link_state_vf, .set_uc_addr = ixgbevf_set_uc_addr_vf, .set_vfta = ixgbevf_set_vfta_vf, .set_rlpml = ixgbevf_set_rlpml_vf, @@ -985,6 +1026,7 @@ static const struct ixgbe_mac_operations ixgbevf_hv_mac_ops = { .set_rar = ixgbevf_hv_set_rar_vf, .update_mc_addr_list = ixgbevf_hv_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_hv_update_xcast_mode, + .get_link_state = ixgbevf_hv_get_link_state_vf, .set_uc_addr = ixgbevf_hv_set_uc_addr_vf, .set_vfta = ixgbevf_hv_set_vfta_vf, .set_rlpml = ixgbevf_hv_set_rlpml_vf, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 54158dac8707..b4eef5b6c172 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -39,6 +39,7 @@ struct ixgbe_mac_operations { s32 (*init_rx_addrs)(struct ixgbe_hw *); s32 (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *); s32 (*update_xcast_mode)(struct ixgbe_hw *, int); + s32 (*get_link_state)(struct ixgbe_hw *hw, bool *link_state); s32 (*enable_mc)(struct ixgbe_hw *); s32 (*disable_mc)(struct ixgbe_hw *); s32 (*clear_vfta)(struct ixgbe_hw *); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 439674fc9765..b6c5122da995 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -28,6 +28,7 @@ #include <linux/udp.h> #include <linux/if_vlan.h> #include <linux/slab.h> +#include <linux/jiffies.h> #include <net/ip6_checksum.h> #include "jme.h" @@ -2179,7 +2180,7 @@ jme_stop_queue_if_full(struct jme_adapter *jme) } if (unlikely(txbi->start_xmit && - (jiffies - txbi->start_xmit) >= TX_TIMEOUT && + time_is_before_eq_jiffies(txbi->start_xmit + TX_TIMEOUT) && txbi->skb)) { netif_stop_queue(jme->dev); netif_info(jme, tx_queued, jme->dev, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 143ca8be5eb5..c31cbbae0eca 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1661,7 +1661,7 @@ mv643xx_eth_set_ringparam(struct net_device *dev, struct ethtool_ringparam *er, if (er->rx_mini_pending || er->rx_jumbo_pending) return -EINVAL; - mp->rx_ring_size = er->rx_pending < 4096 ? er->rx_pending : 4096; + mp->rx_ring_size = min(er->rx_pending, 4096U); mp->tx_ring_size = clamp_t(unsigned int, er->tx_pending, MV643XX_MAX_SKB_DESCS * 2, 4096); if (mp->tx_ring_size != er->tx_pending) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 83c8908f0cc7..f1335a1ed695 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1884,8 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, bytes_compl += buf->skb->len; pkts_compl++; dev_kfree_skb_any(buf->skb); - } else if (buf->type == MVNETA_TYPE_XDP_TX || - buf->type == MVNETA_TYPE_XDP_NDO) { + } else if ((buf->type == MVNETA_TYPE_XDP_TX || + buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) { if (napi && buf->type == MVNETA_TYPE_XDP_TX) xdp_return_frame_rx_napi(buf->xdpf); else @@ -2060,61 +2060,104 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq) static void mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, - struct xdp_buff *xdp, struct skb_shared_info *sinfo, - int sync_len) + struct xdp_buff *xdp, int sync_len) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); int i; + if (likely(!xdp_buff_has_frags(xdp))) + goto out; + for (i = 0; i < sinfo->nr_frags; i++) page_pool_put_full_page(rxq->page_pool, skb_frag_page(&sinfo->frags[i]), true); + +out: page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), sync_len, true); } static int mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, - struct xdp_frame *xdpf, bool dma_map) + struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); + struct device *dev = pp->dev->dev.parent; struct mvneta_tx_desc *tx_desc; - struct mvneta_tx_buf *buf; - dma_addr_t dma_addr; + int i, num_frames = 1; + struct page *page; - if (txq->count >= txq->tx_stop_threshold) + if (unlikely(xdp_frame_has_frags(xdpf))) + num_frames += sinfo->nr_frags; + + if (txq->count + num_frames >= txq->size) return MVNETA_XDP_DROPPED; - tx_desc = mvneta_txq_next_desc_get(txq); + for (i = 0; i < num_frames; i++) { + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + skb_frag_t *frag = NULL; + int len = xdpf->len; + dma_addr_t dma_addr; - buf = &txq->buf[txq->txq_put_index]; - if (dma_map) { - /* ndo_xdp_xmit */ - dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data, - xdpf->len, DMA_TO_DEVICE); - if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) { - mvneta_txq_desc_put(txq); - return MVNETA_XDP_DROPPED; + if (unlikely(i)) { /* paged area */ + frag = &sinfo->frags[i - 1]; + len = skb_frag_size(frag); } - buf->type = MVNETA_TYPE_XDP_NDO; - } else { - struct page *page = virt_to_page(xdpf->data); - dma_addr = page_pool_get_dma_addr(page) + - sizeof(*xdpf) + xdpf->headroom; - dma_sync_single_for_device(pp->dev->dev.parent, dma_addr, - xdpf->len, DMA_BIDIRECTIONAL); - buf->type = MVNETA_TYPE_XDP_TX; - } - buf->xdpf = xdpf; + tx_desc = mvneta_txq_next_desc_get(txq); + if (dma_map) { + /* ndo_xdp_xmit */ + void *data; + + data = unlikely(frag) ? skb_frag_address(frag) + : xdpf->data; + dma_addr = dma_map_single(dev, data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + mvneta_txq_desc_put(txq); + goto unmap; + } + + buf->type = MVNETA_TYPE_XDP_NDO; + } else { + page = unlikely(frag) ? skb_frag_page(frag) + : virt_to_page(xdpf->data); + dma_addr = page_pool_get_dma_addr(page); + if (unlikely(frag)) + dma_addr += skb_frag_off(frag); + else + dma_addr += sizeof(*xdpf) + xdpf->headroom; + dma_sync_single_for_device(dev, dma_addr, len, + DMA_BIDIRECTIONAL); + buf->type = MVNETA_TYPE_XDP_TX; + } + buf->xdpf = unlikely(i) ? NULL : xdpf; - tx_desc->command = MVNETA_TXD_FLZ_DESC; - tx_desc->buf_phys_addr = dma_addr; - tx_desc->data_size = xdpf->len; + tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC; + tx_desc->buf_phys_addr = dma_addr; + tx_desc->data_size = len; + *nxmit_byte += len; - mvneta_txq_inc_put(txq); - txq->pending++; - txq->count++; + mvneta_txq_inc_put(txq); + } + /*last descriptor */ + tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD; + + txq->pending += num_frames; + txq->count += num_frames; return MVNETA_XDP_TX; + +unmap: + for (i--; i >= 0; i--) { + mvneta_txq_desc_put(txq); + tx_desc = txq->descs + txq->next_desc_to_proc; + dma_unmap_single(dev, tx_desc->buf_phys_addr, + tx_desc->data_size, + DMA_TO_DEVICE); + } + + return MVNETA_XDP_DROPPED; } static int @@ -2123,8 +2166,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); struct mvneta_tx_queue *txq; struct netdev_queue *nq; + int cpu, nxmit_byte = 0; struct xdp_frame *xdpf; - int cpu; u32 ret; xdpf = xdp_convert_buff_to_frame(xdp); @@ -2136,10 +2179,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) nq = netdev_get_tx_queue(pp->dev, txq->id); __netif_tx_lock(nq, cpu); - ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false); + ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false); if (ret == MVNETA_XDP_TX) { u64_stats_update_begin(&stats->syncp); - stats->es.ps.tx_bytes += xdpf->len; + stats->es.ps.tx_bytes += nxmit_byte; stats->es.ps.tx_packets++; stats->es.ps.xdp_tx++; u64_stats_update_end(&stats->syncp); @@ -2178,11 +2221,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, __netif_tx_lock(nq, cpu); for (i = 0; i < num_frame; i++) { - ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true); + ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte, + true); if (ret != MVNETA_XDP_TX) break; - nxmit_byte += frames[i]->len; nxmit++; } @@ -2205,7 +2248,6 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, struct bpf_prog *prog, struct xdp_buff *xdp, u32 frame_sz, struct mvneta_stats *stats) { - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); unsigned int len, data_len, sync; u32 ret, act; @@ -2226,7 +2268,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, err = xdp_do_redirect(pp->dev, xdp, prog); if (unlikely(err)) { - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); ret = MVNETA_XDP_DROPPED; } else { ret = MVNETA_XDP_REDIR; @@ -2237,7 +2279,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, case XDP_TX: ret = mvneta_xdp_xmit_back(pp, xdp); if (ret != MVNETA_XDP_TX) - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); break; default: bpf_warn_invalid_xdp_action(pp->dev, prog, act); @@ -2246,7 +2288,7 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, trace_xdp_exception(pp->dev, prog, act); fallthrough; case XDP_DROP: - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); + mvneta_xdp_put_buff(pp, rxq, xdp, sync); ret = MVNETA_XDP_DROPPED; stats->xdp_drop++; break; @@ -2269,7 +2311,6 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, int data_len = -MVNETA_MH_SIZE, len; struct net_device *dev = pp->dev; enum dma_data_direction dma_dir; - struct skb_shared_info *sinfo; if (*size > MVNETA_MAX_RX_BUF_SIZE) { len = MVNETA_MAX_RX_BUF_SIZE; @@ -2289,11 +2330,9 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, /* Prefetch header */ prefetch(data); + xdp_buff_clear_frags_flag(xdp); xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE, data_len, false); - - sinfo = xdp_get_shared_info_from_buff(xdp); - sinfo->nr_frags = 0; } static void @@ -2301,9 +2340,9 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, struct mvneta_rx_desc *rx_desc, struct mvneta_rx_queue *rxq, struct xdp_buff *xdp, int *size, - struct skb_shared_info *xdp_sinfo, struct page *page) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); struct net_device *dev = pp->dev; enum dma_data_direction dma_dir; int data_len, len; @@ -2321,25 +2360,25 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, len, dma_dir); rx_desc->buf_phys_addr = 0; - if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) { - skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++]; + if (!xdp_buff_has_frags(xdp)) + sinfo->nr_frags = 0; + + if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++]; skb_frag_off_set(frag, pp->rx_offset_correction); skb_frag_size_set(frag, data_len); __skb_frag_set_page(frag, page); + + if (!xdp_buff_has_frags(xdp)) { + sinfo->xdp_frags_size = *size; + xdp_buff_set_frags_flag(xdp); + } + if (page_is_pfmemalloc(page)) + xdp_buff_set_frag_pfmemalloc(xdp); } else { page_pool_put_full_page(rxq->page_pool, page, true); } - - /* last fragment */ - if (len == *size) { - struct skb_shared_info *sinfo; - - sinfo = xdp_get_shared_info_from_buff(xdp); - sinfo->nr_frags = xdp_sinfo->nr_frags; - memcpy(sinfo->frags, xdp_sinfo->frags, - sinfo->nr_frags * sizeof(skb_frag_t)); - } *size -= len; } @@ -2348,8 +2387,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, struct xdp_buff *xdp, u32 desc_status) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); - int i, num_frags = sinfo->nr_frags; struct sk_buff *skb; + u8 num_frags; + + if (unlikely(xdp_buff_has_frags(xdp))) + num_frags = sinfo->nr_frags; skb = build_skb(xdp->data_hard_start, PAGE_SIZE); if (!skb) @@ -2361,13 +2403,11 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb_put(skb, xdp->data_end - xdp->data); skb->ip_summed = mvneta_rx_csum(pp, desc_status); - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - skb_frag_page(frag), skb_frag_off(frag), - skb_frag_size(frag), PAGE_SIZE); - } + if (unlikely(xdp_buff_has_frags(xdp))) + xdp_update_skb_shared_info(skb, num_frags, + sinfo->xdp_frags_size, + num_frags * xdp->frame_sz, + xdp_buff_is_frag_pfmemalloc(xdp)); return skb; } @@ -2379,7 +2419,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, { int rx_proc = 0, rx_todo, refill, size = 0; struct net_device *dev = pp->dev; - struct skb_shared_info sinfo; struct mvneta_stats ps = {}; struct bpf_prog *xdp_prog; u32 desc_status, frame_sz; @@ -2388,8 +2427,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq); xdp_buf.data_hard_start = NULL; - sinfo.nr_frags = 0; - /* Get number of received packets */ rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq); @@ -2431,7 +2468,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, } mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, - &size, &sinfo, page); + &size, page); } /* Middle or Last descriptor */ if (!(rx_status & MVNETA_RXD_LAST_DESC)) @@ -2439,7 +2476,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, continue; if (size) { - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); goto next; } @@ -2451,7 +2488,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi, if (IS_ERR(skb)) { struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); u64_stats_update_begin(&stats->syncp); stats->es.skb_alloc_error++; @@ -2468,11 +2505,10 @@ static int mvneta_rx_swbm(struct napi_struct *napi, napi_gro_receive(napi, skb); next: xdp_buf.data_hard_start = NULL; - sinfo.nr_frags = 0; } if (xdp_buf.data_hard_start) - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); if (ps.xdp_redirect) xdp_do_flush_map(); @@ -3260,7 +3296,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp, return err; } - err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0); + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0, + PAGE_SIZE); if (err < 0) goto err_free_pp; @@ -3740,6 +3777,7 @@ static void mvneta_percpu_disable(void *arg) static int mvneta_change_mtu(struct net_device *dev, int mtu) { struct mvneta_port *pp = netdev_priv(dev); + struct bpf_prog *prog = pp->xdp_prog; int ret; if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { @@ -3748,8 +3786,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); } - if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) { - netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu); + if (prog && !prog->aux->xdp_has_frags && + mtu > MVNETA_MAX_RX_BUF_SIZE) { + netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n", + mtu); + return -EINVAL; } @@ -3969,6 +4010,15 @@ static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = { .pcs_an_restart = mvneta_pcs_an_restart, }; +static struct phylink_pcs *mvneta_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct mvneta_port *pp = netdev_priv(ndev); + + return &pp->phylink_pcs; +} + static int mvneta_mac_prepare(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { @@ -4169,13 +4219,14 @@ static void mvneta_mac_link_up(struct phylink_config *config, mvneta_port_up(pp); if (phy && pp->eee_enabled) { - pp->eee_active = phy_init_eee(phy, 0) >= 0; + pp->eee_active = phy_init_eee(phy, false) >= 0; mvneta_set_eee(pp, pp->eee_active && pp->tx_lpi_enabled); } } static const struct phylink_mac_ops mvneta_phylink_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = mvneta_mac_select_pcs, .mac_prepare = mvneta_mac_prepare, .mac_config = mvneta_mac_config, .mac_finish = mvneta_mac_finish, @@ -4490,8 +4541,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct mvneta_port *pp = netdev_priv(dev); struct bpf_prog *old_prog; - if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { - NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); + if (prog && !prog->aux->xdp_has_frags && + dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { + NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags"); return -EOPNOTSUPP; } @@ -5321,26 +5373,62 @@ static int mvneta_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; - dev->irq = irq_of_parse_and_map(dn, 0); - if (dev->irq == 0) - return -EINVAL; + dev->tx_queue_len = MVNETA_MAX_TXD; + dev->watchdog_timeo = 5 * HZ; + dev->netdev_ops = &mvneta_netdev_ops; + dev->ethtool_ops = &mvneta_eth_tool_ops; + + pp = netdev_priv(dev); + spin_lock_init(&pp->lock); + pp->dn = dn; + + pp->rxq_def = rxq_def; + pp->indir[0] = rxq_def; err = of_get_phy_mode(dn, &phy_mode); if (err) { dev_err(&pdev->dev, "incorrect phy-mode\n"); - goto err_free_irq; + return err; } + pp->phy_interface = phy_mode; + comphy = devm_of_phy_get(&pdev->dev, dn, NULL); - if (comphy == ERR_PTR(-EPROBE_DEFER)) { - err = -EPROBE_DEFER; - goto err_free_irq; - } else if (IS_ERR(comphy)) { + if (comphy == ERR_PTR(-EPROBE_DEFER)) + return -EPROBE_DEFER; + + if (IS_ERR(comphy)) comphy = NULL; + + pp->comphy = comphy; + + pp->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pp->base)) + return PTR_ERR(pp->base); + + /* Get special SoC configurations */ + if (of_device_is_compatible(dn, "marvell,armada-3700-neta")) + pp->neta_armada3700 = true; + + dev->irq = irq_of_parse_and_map(dn, 0); + if (dev->irq == 0) + return -EINVAL; + + pp->clk = devm_clk_get(&pdev->dev, "core"); + if (IS_ERR(pp->clk)) + pp->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pp->clk)) { + err = PTR_ERR(pp->clk); + goto err_free_irq; } - pp = netdev_priv(dev); - spin_lock_init(&pp->lock); + clk_prepare_enable(pp->clk); + + pp->clk_bus = devm_clk_get(&pdev->dev, "bus"); + if (!IS_ERR(pp->clk_bus)) + clk_prepare_enable(pp->clk_bus); + + pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; pp->phylink_config.dev = &dev->dev; pp->phylink_config.type = PHYLINK_NETDEV; @@ -5377,55 +5465,16 @@ static int mvneta_probe(struct platform_device *pdev) phy_mode, &mvneta_phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); - goto err_free_irq; - } - - dev->tx_queue_len = MVNETA_MAX_TXD; - dev->watchdog_timeo = 5 * HZ; - dev->netdev_ops = &mvneta_netdev_ops; - - dev->ethtool_ops = &mvneta_eth_tool_ops; - - pp->phylink = phylink; - pp->comphy = comphy; - pp->phy_interface = phy_mode; - pp->dn = dn; - - pp->rxq_def = rxq_def; - pp->indir[0] = rxq_def; - - /* Get special SoC configurations */ - if (of_device_is_compatible(dn, "marvell,armada-3700-neta")) - pp->neta_armada3700 = true; - - pp->clk = devm_clk_get(&pdev->dev, "core"); - if (IS_ERR(pp->clk)) - pp->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(pp->clk)) { - err = PTR_ERR(pp->clk); - goto err_free_phylink; - } - - clk_prepare_enable(pp->clk); - - pp->clk_bus = devm_clk_get(&pdev->dev, "bus"); - if (!IS_ERR(pp->clk_bus)) - clk_prepare_enable(pp->clk_bus); - - pp->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(pp->base)) { - err = PTR_ERR(pp->base); goto err_clk; } - pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; - phylink_set_pcs(phylink, &pp->phylink_pcs); + pp->phylink = phylink; /* Alloc per-cpu port structure */ pp->ports = alloc_percpu(struct mvneta_pcpu_port); if (!pp->ports) { err = -ENOMEM; - goto err_clk; + goto err_free_phylink; } /* Alloc per-cpu stats */ @@ -5569,12 +5618,12 @@ err_netdev: free_percpu(pp->stats); err_free_ports: free_percpu(pp->ports); -err_clk: - clk_disable_unprepare(pp->clk_bus); - clk_disable_unprepare(pp->clk); err_free_phylink: if (pp->phylink) phylink_destroy(pp->phylink); +err_clk: + clk_disable_unprepare(pp->clk_bus); + clk_disable_unprepare(pp->clk); err_free_irq: irq_dispose_mapping(dev->irq); return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 3631d612aaca..25491edc35ce 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -578,31 +578,78 @@ void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable) } } +static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id, + u8 *tx_pause, u8 *rx_pause) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (is_dev_rpm(cgx)) + return 0; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + *rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK); + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); + *tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV); + return 0; +} + /* Enable or disable forwarding received pause frames to Tx block */ void cgx_lmac_enadis_rx_pause_fwding(void *cgxd, int lmac_id, bool enable) { struct cgx *cgx = cgxd; + u8 rx_pause, tx_pause; + bool is_pfc_enabled; + struct lmac *lmac; u64 cfg; if (!cgx) return; - if (enable) { - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + lmac = lmac_pdata(lmac_id, cgx); + if (!lmac) + return; - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + /* Pause frames are not enabled just return */ + if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) + return; + + cgx_lmac_get_pause_frm_status(cgx, lmac_id, &rx_pause, &tx_pause); + is_pfc_enabled = rx_pause ? false : true; + + if (enable) { + if (!is_pfc_enabled) { + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + } else { + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); + cfg |= CGXX_SMUX_CBFC_CTL_BCK_EN; + cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg); + } } else { - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + if (!is_pfc_enabled) { + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + } else { + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); + cfg &= ~CGXX_SMUX_CBFC_CTL_BCK_EN; + cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg); + } } } @@ -722,26 +769,6 @@ int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable) return !!(last & DATA_PKT_TX_EN); } -static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id, - u8 *tx_pause, u8 *rx_pause) -{ - struct cgx *cgx = cgxd; - u64 cfg; - - if (is_dev_rpm(cgx)) - return 0; - - if (!is_lmac_valid(cgx, lmac_id)) - return -ENODEV; - - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - *rx_pause = !!(cfg & CGX_SMUX_RX_FRM_CTL_CTL_BCK); - - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); - *tx_pause = !!(cfg & CGX_SMUX_TX_CTL_L2P_BP_CONV); - return 0; -} - static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause) { @@ -782,21 +809,8 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable) if (!is_lmac_valid(cgx, lmac_id)) return; - if (enable) { - /* Enable receive pause frames */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg |= CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); - - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg |= CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); - - /* Enable pause frames transmission */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); - cfg |= CGX_SMUX_TX_CTL_L2P_BP_CONV; - cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg); + if (enable) { /* Set pause time and interval */ cgx_write(cgx, lmac_id, CGXX_SMUX_TX_PAUSE_PKT_TIME, DEFAULT_PAUSE_TIME); @@ -813,21 +827,120 @@ static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable) cfg &= ~0xFFFFULL; cgx_write(cgx, lmac_id, CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL, cfg | (DEFAULT_PAUSE_TIME / 2)); - } else { - /* ALL pause frames received are completely ignored */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); - cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + } - cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); - cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; - cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + /* ALL pause frames received are completely ignored */ + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); + cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL, cfg); + + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); + cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK; + cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg); + + /* Disable pause frames transmission */ + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); + cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV; + cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg); + + cfg = cgx_read(cgx, 0, CGXX_CMR_RX_OVR_BP); + cfg |= CGX_CMR_RX_OVR_BP_EN(lmac_id); + cfg &= ~CGX_CMR_RX_OVR_BP_BP(lmac_id); + cgx_write(cgx, 0, CGXX_CMR_RX_OVR_BP, cfg); +} + +int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, + int pfvf_idx) +{ + struct cgx *cgx = cgxd; + struct lmac *lmac; + + lmac = lmac_pdata(lmac_id, cgx); + if (!lmac) + return -ENODEV; + + if (!rx_pause) + clear_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap); + else + set_bit(pfvf_idx, lmac->rx_fc_pfvf_bmap.bmap); + + if (!tx_pause) + clear_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap); + else + set_bit(pfvf_idx, lmac->tx_fc_pfvf_bmap.bmap); + + /* check if other pfvfs are using flow control */ + if (!rx_pause && bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) { + dev_warn(&cgx->pdev->dev, + "Receive Flow control disable not permitted as its used by other PFVFs\n"); + return -EPERM; + } + + if (!tx_pause && bitmap_weight(lmac->tx_fc_pfvf_bmap.bmap, lmac->tx_fc_pfvf_bmap.max)) { + dev_warn(&cgx->pdev->dev, + "Transmit Flow control disable not permitted as its used by other PFVFs\n"); + return -EPERM; + } + + return 0; +} + +int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause, + u8 rx_pause, u16 pfc_en) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + /* Return as no traffic classes are requested */ + if (tx_pause && !pfc_en) + return 0; + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); - /* Disable pause frames transmission */ - cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_TX_CTL); - cfg &= ~CGX_SMUX_TX_CTL_L2P_BP_CONV; - cgx_write(cgx, lmac_id, CGXX_SMUX_TX_CTL, cfg); + if (rx_pause) { + cfg |= (CGXX_SMUX_CBFC_CTL_RX_EN | + CGXX_SMUX_CBFC_CTL_BCK_EN | + CGXX_SMUX_CBFC_CTL_DRP_EN); + } else { + cfg &= ~(CGXX_SMUX_CBFC_CTL_RX_EN | + CGXX_SMUX_CBFC_CTL_BCK_EN | + CGXX_SMUX_CBFC_CTL_DRP_EN); } + + if (tx_pause) + cfg |= CGXX_SMUX_CBFC_CTL_TX_EN; + else + cfg &= ~CGXX_SMUX_CBFC_CTL_TX_EN; + + cfg = FIELD_SET(CGX_PFC_CLASS_MASK, pfc_en, cfg); + + cgx_write(cgx, lmac_id, CGXX_SMUX_CBFC_CTL, cfg); + + /* Write source MAC address which will be filled into PFC packet */ + cfg = cgx_lmac_addr_get(cgx->cgx_id, lmac_id); + cgx_write(cgx, lmac_id, CGXX_SMUX_SMAC, cfg); + + return 0; +} + +int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, + u8 *rx_pause) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_CBFC_CTL); + + *rx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_RX_EN); + *tx_pause = !!(cfg & CGXX_SMUX_CBFC_CTL_TX_EN); + + return 0; } void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable) @@ -1489,6 +1602,16 @@ static int cgx_lmac_init(struct cgx *cgx) /* Reserve first entry for default MAC address */ set_bit(0, lmac->mac_to_index_bmap.bmap); + lmac->rx_fc_pfvf_bmap.max = 128; + err = rvu_alloc_bitmap(&lmac->rx_fc_pfvf_bmap); + if (err) + goto err_dmac_bmap_free; + + lmac->tx_fc_pfvf_bmap.max = 128; + err = rvu_alloc_bitmap(&lmac->tx_fc_pfvf_bmap); + if (err) + goto err_rx_fc_bmap_free; + init_waitqueue_head(&lmac->wq_cmd_cmplt); mutex_init(&lmac->cmd_lock); spin_lock_init(&lmac->event_cb_lock); @@ -1505,6 +1628,10 @@ static int cgx_lmac_init(struct cgx *cgx) return cgx_lmac_verify_fwi_version(cgx); err_bitmap_free: + rvu_free_bitmap(&lmac->tx_fc_pfvf_bmap); +err_rx_fc_bmap_free: + rvu_free_bitmap(&lmac->rx_fc_pfvf_bmap); +err_dmac_bmap_free: rvu_free_bitmap(&lmac->mac_to_index_bmap); err_name_free: kfree(lmac->name); @@ -1572,6 +1699,8 @@ static struct mac_ops cgx_mac_ops = { .mac_enadis_ptp_config = cgx_lmac_ptp_config, .mac_rx_tx_enable = cgx_lmac_rx_tx_enable, .mac_tx_enable = cgx_lmac_tx_enable, + .pfc_config = cgx_lmac_pfc_config, + .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index ab1e4abdea38..bd2f33a26eee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -76,6 +76,13 @@ #define CGXX_SMUX_TX_CTL 0x20178 #define CGXX_SMUX_TX_PAUSE_PKT_TIME 0x20110 #define CGXX_SMUX_TX_PAUSE_PKT_INTERVAL 0x20120 +#define CGXX_SMUX_SMAC 0x20108 +#define CGXX_SMUX_CBFC_CTL 0x20218 +#define CGXX_SMUX_CBFC_CTL_RX_EN BIT_ULL(0) +#define CGXX_SMUX_CBFC_CTL_TX_EN BIT_ULL(1) +#define CGXX_SMUX_CBFC_CTL_DRP_EN BIT_ULL(2) +#define CGXX_SMUX_CBFC_CTL_BCK_EN BIT_ULL(3) +#define CGX_PFC_CLASS_MASK GENMASK_ULL(47, 32) #define CGXX_GMP_GMI_TX_PAUSE_PKT_TIME 0x38230 #define CGXX_GMP_GMI_TX_PAUSE_PKT_INTERVAL 0x38248 #define CGX_SMUX_TX_CTL_L2P_BP_CONV BIT_ULL(7) @@ -172,4 +179,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset); int cgx_lmac_addr_update(u8 cgx_id, u8 lmac_id, u8 *mac_addr, u8 index); u64 cgx_read_dmac_ctrl(void *cgxd, int lmac_id); u64 cgx_read_dmac_entry(void *cgxd, int index); +int cgx_lmac_pfc_config(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, + u16 pfc_en); +int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, + u8 *rx_pause); +int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, + int pfvf_idx); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index b33e7d1d0851..f30581bf0688 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -17,6 +17,8 @@ * @resp: command response * @link_info: link related information * @mac_to_index_bmap: Mac address to CGX table index mapping + * @rx_fc_pfvf_bmap: Receive flow control enabled netdev mapping + * @tx_fc_pfvf_bmap: Transmit flow control enabled netdev mapping * @event_cb: callback for linkchange events * @event_cb_lock: lock for serializing callback with unregister * @cgx: parent cgx port @@ -33,6 +35,8 @@ struct lmac { u64 resp; struct cgx_link_user_info link_info; struct rsrc_bmap mac_to_index_bmap; + struct rsrc_bmap rx_fc_pfvf_bmap; + struct rsrc_bmap tx_fc_pfvf_bmap; struct cgx_event_cb event_cb; /* lock for serializing callback with unregister */ spinlock_t event_cb_lock; @@ -110,6 +114,12 @@ struct mac_ops { int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable); int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable); + int (*pfc_config)(void *cgxd, int lmac_id, + u8 tx_pause, u8 rx_pause, u16 pfc_en); + + int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id, + u8 *tx_pause, u8 *rx_pause); + }; struct cgx { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 58e2aeebc14f..550cb11197bf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -172,6 +172,8 @@ M(RPM_STATS, 0x21C, rpm_stats, msg_req, rpm_stats_rsp) \ M(CGX_MAC_ADDR_RESET, 0x21D, cgx_mac_addr_reset, msg_req, msg_rsp) \ M(CGX_MAC_ADDR_UPDATE, 0x21E, cgx_mac_addr_update, cgx_mac_addr_update_req, \ msg_rsp) \ +M(CGX_PRIO_FLOW_CTRL_CFG, 0x21F, cgx_prio_flow_ctrl_cfg, cgx_pfc_cfg, \ + cgx_pfc_rsp) \ /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \ npa_lf_alloc_req, npa_lf_alloc_rsp) \ @@ -609,6 +611,21 @@ struct rpm_stats_rsp { u64 tx_stats[RPM_TX_STATS_COUNT]; }; +struct cgx_pfc_cfg { + struct mbox_msghdr hdr; + u8 rx_pause; + u8 tx_pause; + u16 pfc_en; /* bitmap indicating pfc enabled traffic classes */ +}; + +struct cgx_pfc_rsp { + struct mbox_msghdr hdr; + u8 rx_pause; + u8 tx_pause; +}; + + /* NPA mbox message formats */ + struct npc_set_pkind { struct mbox_msghdr hdr; #define OTX2_PRIV_FLAGS_DEFAULT BIT_ULL(0) @@ -1603,6 +1620,8 @@ enum cgx_af_status { LMAC_AF_ERR_INVALID_PARAM = -1101, LMAC_AF_ERR_PF_NOT_MAPPED = -1102, LMAC_AF_ERR_PERM_DENIED = -1103, + LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED = -1104, + LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED = -1105, }; #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index e682b7bfde64..67a6821d2dff 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -25,6 +25,9 @@ #define PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP 0xB600 #define PCI_DEVID_OCTEONTX2_RST 0xA085 #define PCI_DEVID_CN10K_PTP 0xA09E +#define PCI_SUBSYS_DEVID_CN10K_A_PTP 0xB900 +#define PCI_SUBSYS_DEVID_CNF10K_A_PTP 0xBA00 +#define PCI_SUBSYS_DEVID_CNF10K_B_PTP 0xBC00 #define PCI_PTP_BAR_NO 0 @@ -46,10 +49,105 @@ #define PTP_CLOCK_HI 0xF10ULL #define PTP_CLOCK_COMP 0xF18ULL #define PTP_TIMESTAMP 0xF20ULL +#define PTP_CLOCK_SEC 0xFD0ULL + +#define CYCLE_MULT 1000 static struct ptp *first_ptp_block; static const struct pci_device_id ptp_id_table[]; +static bool cn10k_ptp_errata(struct ptp *ptp) +{ + if (ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_PTP || + ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10K_A_PTP) + return true; + return false; +} + +static bool is_ptp_tsfmt_sec_nsec(struct ptp *ptp) +{ + if (ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_PTP || + ptp->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10K_A_PTP) + return true; + return false; +} + +static u64 read_ptp_tstmp_sec_nsec(struct ptp *ptp) +{ + u64 sec, sec1, nsec; + unsigned long flags; + + spin_lock_irqsave(&ptp->ptp_lock, flags); + sec = readq(ptp->reg_base + PTP_CLOCK_SEC) & 0xFFFFFFFFUL; + nsec = readq(ptp->reg_base + PTP_CLOCK_HI); + sec1 = readq(ptp->reg_base + PTP_CLOCK_SEC) & 0xFFFFFFFFUL; + /* check nsec rollover */ + if (sec1 > sec) { + nsec = readq(ptp->reg_base + PTP_CLOCK_HI); + sec = sec1; + } + spin_unlock_irqrestore(&ptp->ptp_lock, flags); + + return sec * NSEC_PER_SEC + nsec; +} + +static u64 read_ptp_tstmp_nsec(struct ptp *ptp) +{ + return readq(ptp->reg_base + PTP_CLOCK_HI); +} + +static u64 ptp_calc_adjusted_comp(u64 ptp_clock_freq) +{ + u64 comp, adj = 0, cycles_per_sec, ns_drift = 0; + u32 ptp_clock_nsec, cycle_time; + int cycle; + + /* Errata: + * Issue #1: At the time of 1 sec rollover of the nano-second counter, + * the nano-second counter is set to 0. However, it should be set to + * (existing counter_value - 10^9). + * + * Issue #2: The nano-second counter rolls over at 0x3B9A_C9FF. + * It should roll over at 0x3B9A_CA00. + */ + + /* calculate ptp_clock_comp value */ + comp = ((u64)1000000000ULL << 32) / ptp_clock_freq; + /* use CYCLE_MULT to avoid accuracy loss due to integer arithmetic */ + cycle_time = NSEC_PER_SEC * CYCLE_MULT / ptp_clock_freq; + /* cycles per sec */ + cycles_per_sec = ptp_clock_freq; + + /* check whether ptp nanosecond counter rolls over early */ + cycle = cycles_per_sec - 1; + ptp_clock_nsec = (cycle * comp) >> 32; + while (ptp_clock_nsec < NSEC_PER_SEC) { + if (ptp_clock_nsec == 0x3B9AC9FF) + goto calc_adj_comp; + cycle++; + ptp_clock_nsec = (cycle * comp) >> 32; + } + /* compute nanoseconds lost per second when nsec counter rolls over */ + ns_drift = ptp_clock_nsec - NSEC_PER_SEC; + /* calculate ptp_clock_comp adjustment */ + if (ns_drift > 0) { + adj = comp * ns_drift; + adj = adj / 1000000000ULL; + } + /* speed up the ptp clock to account for nanoseconds lost */ + comp += adj; + return comp; + +calc_adj_comp: + /* slow down the ptp clock to not rollover early */ + adj = comp * cycle_time; + adj = adj / 1000000000ULL; + adj = adj / CYCLE_MULT; + comp -= adj; + + return comp; +} + struct ptp *ptp_get(void) { struct ptp *ptp = first_ptp_block; @@ -77,8 +175,8 @@ void ptp_put(struct ptp *ptp) static int ptp_adjfine(struct ptp *ptp, long scaled_ppm) { bool neg_adj = false; - u64 comp; - u64 adj; + u32 freq, freq_adj; + u64 comp, adj; s64 ppb; if (scaled_ppm < 0) { @@ -100,15 +198,22 @@ static int ptp_adjfine(struct ptp *ptp, long scaled_ppm) * where tbase is the basic compensation value calculated * initialy in the probe function. */ - comp = ((u64)1000000000ull << 32) / ptp->clock_rate; /* convert scaled_ppm to ppb */ ppb = 1 + scaled_ppm; ppb *= 125; ppb >>= 13; - adj = comp * ppb; - adj = div_u64(adj, 1000000000ull); - comp = neg_adj ? comp - adj : comp + adj; + if (cn10k_ptp_errata(ptp)) { + /* calculate the new frequency based on ppb */ + freq_adj = (ptp->clock_rate * ppb) / 1000000000ULL; + freq = neg_adj ? ptp->clock_rate + freq_adj : ptp->clock_rate - freq_adj; + comp = ptp_calc_adjusted_comp(freq); + } else { + comp = ((u64)1000000000ull << 32) / ptp->clock_rate; + adj = comp * ppb; + adj = div_u64(adj, 1000000000ull); + comp = neg_adj ? comp - adj : comp + adj; + } writeq(comp, ptp->reg_base + PTP_CLOCK_COMP); return 0; @@ -117,7 +222,7 @@ static int ptp_adjfine(struct ptp *ptp, long scaled_ppm) static int ptp_get_clock(struct ptp *ptp, u64 *clk) { /* Return the current PTP clock */ - *clk = readq(ptp->reg_base + PTP_CLOCK_HI); + *clk = ptp->read_ptp_tstmp(ptp); return 0; } @@ -166,7 +271,11 @@ void ptp_start(struct ptp *ptp, u64 sclk, u32 ext_clk_freq, u32 extts) writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_HI_INCR); writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_LO_INCR); - clock_comp = ((u64)1000000000ull << 32) / ptp->clock_rate; + if (cn10k_ptp_errata(ptp)) + clock_comp = ptp_calc_adjusted_comp(ptp->clock_rate); + else + clock_comp = ((u64)1000000000ull << 32) / ptp->clock_rate; + /* Initial compensation value to start the nanosecs counter */ writeq(clock_comp, ptp->reg_base + PTP_CLOCK_COMP); } @@ -214,6 +323,12 @@ static int ptp_probe(struct pci_dev *pdev, if (!first_ptp_block) first_ptp_block = ptp; + spin_lock_init(&ptp->ptp_lock); + if (is_ptp_tsfmt_sec_nsec(ptp)) + ptp->read_ptp_tstmp = &read_ptp_tstmp_sec_nsec; + else + ptp->read_ptp_tstmp = &read_ptp_tstmp_nsec; + return 0; error_free: diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h index 1b81a0493cd3..95a955159f40 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.h @@ -15,6 +15,8 @@ struct ptp { struct pci_dev *pdev; void __iomem *reg_base; + u64 (*read_ptp_tstmp)(struct ptp *ptp); + spinlock_t ptp_lock; /* lock */ u32 clock_rate; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 9ea2f6ac38ec..47e83d7a5804 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -32,6 +32,8 @@ static struct mac_ops rpm_mac_ops = { .mac_enadis_ptp_config = rpm_lmac_ptp_config, .mac_rx_tx_enable = rpm_lmac_rx_tx_enable, .mac_tx_enable = rpm_lmac_tx_enable, + .pfc_config = rpm_lmac_pfc_config, + .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, }; struct mac_ops *rpm_get_mac_ops(void) @@ -96,11 +98,20 @@ int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable) void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; + struct lmac *lmac; u64 cfg; if (!rpm) return; + lmac = lmac_pdata(lmac_id, rpm); + if (!lmac) + return; + + /* Pause frames are not enabled just return */ + if (!bitmap_weight(lmac->rx_fc_pfvf_bmap.bmap, lmac->rx_fc_pfvf_bmap.max)) + return; + if (enable) { cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; @@ -122,13 +133,94 @@ int rpm_lmac_get_pause_frm_status(void *rpmd, int lmac_id, return -ENODEV; cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE); + if (!(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE)) { + *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE); + *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE); + } - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE); return 0; } +static void rpm_cfg_pfc_quanta_thresh(rpm_t *rpm, int lmac_id, + unsigned long pfc_en, + bool enable) +{ + u64 quanta_offset = 0, quanta_thresh = 0, cfg; + int i, shift; + + /* Set pause time and interval */ + for_each_set_bit(i, &pfc_en, 16) { + switch (i) { + case 0: + case 1: + quanta_offset = RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL01_QUANTA_THRESH; + break; + case 2: + case 3: + quanta_offset = RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL23_QUANTA_THRESH; + break; + case 4: + case 5: + quanta_offset = RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL45_QUANTA_THRESH; + break; + case 6: + case 7: + quanta_offset = RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL67_QUANTA_THRESH; + break; + case 8: + case 9: + quanta_offset = RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL89_QUANTA_THRESH; + break; + case 10: + case 11: + quanta_offset = RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH; + break; + case 12: + case 13: + quanta_offset = RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH; + break; + case 14: + case 15: + quanta_offset = RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA; + quanta_thresh = RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH; + break; + } + + if (!quanta_offset || !quanta_thresh) + continue; + + shift = (i % 2) ? 1 : 0; + cfg = rpm_read(rpm, lmac_id, quanta_offset); + if (enable) { + cfg |= ((u64)RPM_DEFAULT_PAUSE_TIME << shift * 16); + } else { + if (!shift) + cfg &= ~GENMASK_ULL(15, 0); + else + cfg &= ~GENMASK_ULL(31, 16); + } + rpm_write(rpm, lmac_id, quanta_offset, cfg); + + cfg = rpm_read(rpm, lmac_id, quanta_thresh); + if (enable) { + cfg |= ((u64)(RPM_DEFAULT_PAUSE_TIME / 2) << shift * 16); + } else { + if (!shift) + cfg &= ~GENMASK_ULL(15, 0); + else + cfg &= ~GENMASK_ULL(31, 16); + } + rpm_write(rpm, lmac_id, quanta_thresh, cfg); + } +} + int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause) { @@ -152,8 +244,12 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP); if (tx_pause) { + /* Configure CL0 Pause Quanta & threshold for 802.3X frames */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true); cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id); } else { + /* Disable all Pause Quanta & threshold values */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false); cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id); cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id); } @@ -166,56 +262,20 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) rpm_t *rpm = rpmd; u64 cfg; - if (enable) { - /* Enable 802.3 pause frame mode */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Enable receive pause frames */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Enable forward pause to TX block */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Enable pause frames transmission */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - - /* Set pause time and interval */ - cfg = rpm_read(rpm, lmac_id, - RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA); - cfg &= ~0xFFFFULL; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA, - cfg | RPM_DEFAULT_PAUSE_TIME); - /* Set pause interval as the hardware default is too short */ - cfg = rpm_read(rpm, lmac_id, - RPMX_MTI_MAC100X_CL01_QUANTA_THRESH); - cfg &= ~0xFFFFULL; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_QUANTA_THRESH, - cfg | (RPM_DEFAULT_PAUSE_TIME / 2)); - - } else { - /* ALL pause frames received are completely ignored */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* ALL pause frames received are completely ignored */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - /* Disable forward pause to TX block */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* Disable forward pause to TX block */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - /* Disable pause frames transmission */ - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); - cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; - rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - } + /* Disable pause frames transmission */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); } int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat) @@ -323,3 +383,65 @@ void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable) cfg &= ~RPMX_RX_TS_PREPEND; rpm_write(rpm, lmac_id, RPMX_CMRX_CFG, cfg); } + +int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 pfc_en) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + /* reset PFC class quanta and threshold */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false); + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + + if (rx_pause) { + cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + } else { + cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + } + + if (tx_pause) { + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, pfc_en, true); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + } else { + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xfff, false); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + } + + if (!rx_pause && !tx_pause) + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; + else + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; + + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL); + cfg = FIELD_SET(RPM_PFC_CLASS_MASK, pfc_en, cfg); + rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg); + + return 0; +} + +int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, u8 *rx_pause) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + if (cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE) { + *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE); + *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE); + } + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index ff580311edd0..9ab8d49dd180 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -33,7 +33,21 @@ #define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE BIT_ULL(8) #define RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE BIT_ULL(19) #define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 +#define RPMX_MTI_MAC100X_CL23_PAUSE_QUANTA 0x80B0 +#define RPMX_MTI_MAC100X_CL45_PAUSE_QUANTA 0x80B8 +#define RPMX_MTI_MAC100X_CL67_PAUSE_QUANTA 0x80C0 #define RPMX_MTI_MAC100X_CL01_QUANTA_THRESH 0x80C8 +#define RPMX_MTI_MAC100X_CL23_QUANTA_THRESH 0x80D0 +#define RPMX_MTI_MAC100X_CL45_QUANTA_THRESH 0x80D8 +#define RPMX_MTI_MAC100X_CL67_QUANTA_THRESH 0x80E0 +#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108 +#define RPMX_MTI_MAC100X_CL1011_PAUSE_QUANTA 0x8110 +#define RPMX_MTI_MAC100X_CL1213_PAUSE_QUANTA 0x8118 +#define RPMX_MTI_MAC100X_CL1415_PAUSE_QUANTA 0x8120 +#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH 0x8128 +#define RPMX_MTI_MAC100X_CL1011_QUANTA_THRESH 0x8130 +#define RPMX_MTI_MAC100X_CL1213_QUANTA_THRESH 0x8138 +#define RPMX_MTI_MAC100X_CL1415_QUANTA_THRESH 0x8140 #define RPM_DEFAULT_PAUSE_TIME 0xFFFF #define RPMX_CMR_RX_OVR_BP 0x4120 #define RPMX_CMR_RX_OVR_BP_EN(x) BIT_ULL((x) + 8) @@ -45,6 +59,18 @@ #define RPM_LMAC_FWI 0xa #define RPM_TX_EN BIT_ULL(0) #define RPM_RX_EN BIT_ULL(1) +#define RPMX_CMRX_PRT_CBFC_CTL 0x5B08 +#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_RX_SHIFT 33 +#define RPMX_CMRX_PRT_CBFC_CTL_PHYS_BP_SHIFT 16 +#define RPMX_CMRX_PRT_CBFC_CTL_LOGL_EN_TX_SHIFT 0 +#define RPM_PFC_CLASS_MASK GENMASK_ULL(48, 33) +#define RPMX_MTI_MAC100X_CL89_QUANTA_THRESH 0x8128 +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_PAD_EN BIT_ULL(11) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE BIT_ULL(8) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD BIT_ULL(7) +#define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 +#define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108 +#define RPM_DEFAULT_PAUSE_TIME 0xFFFF /* Function Declarations */ int rpm_get_nr_lmacs(void *rpmd); @@ -61,4 +87,8 @@ int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat); void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable); int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable); int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, + u16 pfc_en); +int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, + u8 *rx_pause); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 5ed94cfb47d2..513b43ecd5be 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -807,6 +807,9 @@ u32 rvu_cgx_get_fifolen(struct rvu *rvu); void *rvu_first_cgx_pdata(struct rvu *rvu); int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id); int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); +int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause, + u16 pfc_en); +int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 8a7ac5a8b821..9ffe99830e34 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -863,6 +863,45 @@ int rvu_mbox_handler_cgx_intlbk_disable(struct rvu *rvu, struct msg_req *req, return 0; } +int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause) +{ + int pf = rvu_get_pf(pcifunc); + u8 rx_pfc = 0, tx_pfc = 0; + struct mac_ops *mac_ops; + u8 cgx_id, lmac_id; + void *cgxd; + + if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC)) + return 0; + + /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if (!is_pf_cgxmapped(rvu, pf)) + return LMAC_AF_ERR_PF_NOT_MAPPED; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &tx_pfc, &rx_pfc); + if (tx_pfc || rx_pfc) { + dev_warn(rvu->dev, + "Can not configure 802.3X flow control as PFC frames are enabled"); + return LMAC_AF_ERR_8023PAUSE_ENADIS_PERM_DENIED; + } + + mutex_lock(&rvu->rsrc_lock); + if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause, + pcifunc & RVU_PFVF_FUNC_MASK)) { + mutex_unlock(&rvu->rsrc_lock); + return LMAC_AF_ERR_PERM_DENIED; + } + mutex_unlock(&rvu->rsrc_lock); + + return mac_ops->mac_enadis_pause_frm(cgxd, lmac_id, tx_pause, rx_pause); +} + int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, struct cgx_pause_frm_cfg *req, struct cgx_pause_frm_cfg *rsp) @@ -870,11 +909,9 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, int pf = rvu_get_pf(req->hdr.pcifunc); struct mac_ops *mac_ops; u8 cgx_id, lmac_id; + int err = 0; void *cgxd; - if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC)) - return 0; - /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, * if received from other PF/VF simply ACK, nothing to do. */ @@ -886,13 +923,11 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, mac_ops = get_mac_ops(cgxd); if (req->set) - mac_ops->mac_enadis_pause_frm(cgxd, lmac_id, - req->tx_pause, req->rx_pause); + err = rvu_cgx_cfg_pause_frm(rvu, req->hdr.pcifunc, req->tx_pause, req->rx_pause); else - mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, - &rsp->tx_pause, - &rsp->rx_pause); - return 0; + mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause); + + return err; } int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req, @@ -1079,3 +1114,67 @@ int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu, rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index); } + +int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, + u8 rx_pause, u16 pfc_en) +{ + int pf = rvu_get_pf(pcifunc); + u8 rx_8023 = 0, tx_8023 = 0; + struct mac_ops *mac_ops; + u8 cgx_id, lmac_id; + void *cgxd; + + /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if (!is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, &tx_8023, &rx_8023); + if (tx_8023 || rx_8023) { + dev_warn(rvu->dev, + "Can not configure PFC as 802.3X pause frames are enabled"); + return LMAC_AF_ERR_PFC_ENADIS_PERM_DENIED; + } + + mutex_lock(&rvu->rsrc_lock); + if (verify_lmac_fc_cfg(cgxd, lmac_id, tx_pause, rx_pause, + pcifunc & RVU_PFVF_FUNC_MASK)) { + mutex_unlock(&rvu->rsrc_lock); + return LMAC_AF_ERR_PERM_DENIED; + } + mutex_unlock(&rvu->rsrc_lock); + + return mac_ops->pfc_config(cgxd, lmac_id, tx_pause, rx_pause, pfc_en); +} + +int rvu_mbox_handler_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, + struct cgx_pfc_cfg *req, + struct cgx_pfc_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + struct mac_ops *mac_ops; + u8 cgx_id, lmac_id; + void *cgxd; + int err; + + /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if (!is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + err = rvu_cgx_prio_flow_ctrl_cfg(rvu, req->hdr.pcifunc, req->tx_pause, + req->rx_pause, req->pfc_en); + + mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause); + return err; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 97fb61915379..0fa625e2528e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -296,7 +296,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf, struct rvu_hwinfo *hw = rvu->hw; struct sdp_node_info *sdp_info; int pkind, pf, vf, lbkid, vfid; - struct mac_ops *mac_ops; u8 cgx_id, lmac_id; bool from_vf; int err; @@ -326,13 +325,6 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf, cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind); rvu_npc_set_pkind(rvu, pkind, pfvf); - mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu)); - - /* By default we enable pause frames */ - if ((pcifunc & RVU_PFVF_FUNC_MASK) == 0) - mac_ops->mac_enadis_pause_frm(rvu_cgx_pdata(cgx_id, - rvu), - lmac_id, true, true); break; case NIX_INTF_TYPE_LBK: vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1; @@ -533,7 +525,7 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, */ switch (type) { case NIX_INTF_TYPE_CGX: - if ((req->chan_base + req->chan_cnt) > 15) + if ((req->chan_base + req->chan_cnt) > 16) return -EINVAL; rvu_get_cgx_lmac_id(pfvf->cgx_lmac, &cgx_id, &lmac_id); /* Assign bpid based on cgx, lmac and chan id */ @@ -4578,6 +4570,12 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf) pfvf->hw_rx_tstamp_en = false; } + /* reset priority flow control config */ + rvu_cgx_prio_flow_ctrl_cfg(rvu, pcifunc, 0, 0, 0); + + /* reset 802.3x flow control config */ + rvu_cgx_cfg_pause_frm(rvu, pcifunc, 0, 0); + nix_ctx_free(rvu, pfvf); nix_free_all_bandprof(rvu, pcifunc); @@ -5314,6 +5312,7 @@ int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc, aq_req.ctype = NIX_AQ_CTYPE_BANDPROF; aq_req.op = NIX_AQ_INSTOP_WRITE; memcpy(&aq_req.prof, &aq_rsp.prof, sizeof(struct nix_bandprof_s)); + memset((char *)&aq_req.prof_mask, 0xff, sizeof(struct nix_bandprof_s)); /* Clear higher layer enable bit in the mid profile, just in case */ aq_req.prof.hl_en = 0; aq_req.prof_mask.hl_en = 1; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 0048b5946712..d463dc72d80a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -11,4 +11,7 @@ rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_devlink.o rvu_nicvf-y := otx2_vf.o otx2_devlink.o +rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o +rvu_nicvf-$(CONFIG_DCB) += otx2_dcbnl.o + ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 66da31f30d3e..b9d7601138ca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -222,8 +222,11 @@ EXPORT_SYMBOL(otx2_set_mac_address); int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu) { struct nix_frs_cfg *req; + u16 maxlen; int err; + maxlen = otx2_get_max_mtu(pfvf) + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_nix_set_hw_frs(&pfvf->mbox); if (!req) { @@ -233,6 +236,10 @@ int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu) req->maxlen = pfvf->netdev->mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; + /* Use max receive length supported by hardware for loopback devices */ + if (is_otx2_lbkvf(pfvf->pdev)) + req->maxlen = maxlen; + err = otx2_sync_mbox_msg(&pfvf->mbox); mutex_unlock(&pfvf->mbox.lock); return err; @@ -262,6 +269,7 @@ unlock: mutex_unlock(&pfvf->mbox.lock); return err; } +EXPORT_SYMBOL(otx2_config_pause_frm); int otx2_set_flowkey_cfg(struct otx2_nic *pfvf) { @@ -931,7 +939,11 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) if (!is_otx2_lbkvf(pfvf->pdev)) { /* Enable receive CQ backpressure */ aq->cq.bp_ena = 1; +#ifdef CONFIG_DCB + aq->cq.bpid = pfvf->bpid[pfvf->queue_to_pfc_map[qidx]]; +#else aq->cq.bpid = pfvf->bpid[0]; +#endif /* Set backpressure level is same as cq pass level */ aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); @@ -1036,7 +1048,7 @@ int otx2_config_nix(struct otx2_nic *pfvf) struct nix_lf_alloc_rsp *rsp; int err; - pfvf->qset.xqe_size = NIX_XQESZ_W16 ? 128 : 512; + pfvf->qset.xqe_size = pfvf->hw.xqe_size; /* Get memory to put this msg */ nixlf = otx2_mbox_alloc_msg_nix_lf_alloc(&pfvf->mbox); @@ -1049,7 +1061,7 @@ int otx2_config_nix(struct otx2_nic *pfvf) nixlf->cq_cnt = pfvf->qset.cq_cnt; nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE; nixlf->rss_grps = MAX_RSS_GROUPS; - nixlf->xqe_sz = NIX_XQESZ_W16; + nixlf->xqe_sz = pfvf->hw.xqe_size == 128 ? NIX_XQESZ_W16 : NIX_XQESZ_W64; /* We don't know absolute NPA LF idx attached. * AF will replace 'RVU_DEFAULT_PF_FUNC' with * NPA LF attached to this RVU PF/VF. @@ -1211,7 +1223,11 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, */ if (pfvf->nix_blkaddr == BLKADDR_NIX1) aq->aura.bp_ena = 1; +#ifdef CONFIG_DCB + aq->aura.nix0_bpid = pfvf->bpid[pfvf->queue_to_pfc_map[aura_id]]; +#else aq->aura.nix0_bpid = pfvf->bpid[0]; +#endif /* Set backpressure level for RQ's Aura */ aq->aura.bp = RQ_BP_LVL_AURA; @@ -1538,11 +1554,18 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) return -ENOMEM; req->chan_base = 0; - req->chan_cnt = 1; +#ifdef CONFIG_DCB + req->chan_cnt = pfvf->pfc_en ? IEEE_8021QAZ_MAX_TCS : 1; + req->bpid_per_chan = pfvf->pfc_en ? 1 : 0; +#else + req->chan_cnt = 1; req->bpid_per_chan = 0; +#endif + return otx2_sync_mbox_msg(&pfvf->mbox); } +EXPORT_SYMBOL(otx2_nix_config_bp); /* Mbox message handlers */ void mbox_handler_cgx_stats(struct otx2_nic *pfvf, @@ -1704,6 +1727,56 @@ out: } EXPORT_SYMBOL(otx2_get_max_mtu); +int otx2_handle_ntuple_tc_features(struct net_device *netdev, netdev_features_t features) +{ + netdev_features_t changed = features ^ netdev->features; + struct otx2_nic *pfvf = netdev_priv(netdev); + bool ntuple = !!(features & NETIF_F_NTUPLE); + bool tc = !!(features & NETIF_F_HW_TC); + + if ((changed & NETIF_F_NTUPLE) && !ntuple) + otx2_destroy_ntuple_flows(pfvf); + + if ((changed & NETIF_F_NTUPLE) && ntuple) { + if (!pfvf->flow_cfg->max_flows) { + netdev_err(netdev, + "Can't enable NTUPLE, MCAM entries not allocated\n"); + return -EINVAL; + } + } + + if ((changed & NETIF_F_HW_TC) && tc) { + if (!pfvf->flow_cfg->max_flows) { + netdev_err(netdev, + "Can't enable TC, MCAM entries not allocated\n"); + return -EINVAL; + } + } + + if ((changed & NETIF_F_HW_TC) && !tc && + pfvf->flow_cfg && pfvf->flow_cfg->nr_flows) { + netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n"); + return -EBUSY; + } + + if ((changed & NETIF_F_NTUPLE) && ntuple && + (netdev->features & NETIF_F_HW_TC) && !(changed & NETIF_F_HW_TC)) { + netdev_err(netdev, + "Can't enable NTUPLE when TC is active, disable TC and retry\n"); + return -EINVAL; + } + + if ((changed & NETIF_F_HW_TC) && tc && + (netdev->features & NETIF_F_NTUPLE) && !(changed & NETIF_F_NTUPLE)) { + netdev_err(netdev, + "Can't enable TC when NTUPLE is active, disable NTUPLE and retry\n"); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(otx2_handle_ntuple_tc_features); + #define M(_name, _id, _fn_name, _req_type, _rsp_type) \ int __weak \ otx2_mbox_up_handler_ ## _fn_name(struct otx2_nic *pfvf, \ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 14509fc64cce..c587c14ac2a3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -17,6 +17,7 @@ #include <linux/soc/marvell/octeontx2/asm.h> #include <net/pkt_cls.h> #include <net/devlink.h> +#include <linux/time64.h> #include <mbox.h> #include <npc.h> @@ -178,6 +179,10 @@ struct otx2_hw { u16 rqpool_cnt; u16 sqpool_cnt; +#define OTX2_DEFAULT_RBUF_LEN 2048 + u16 rbuf_len; + u32 xqe_size; + /* NPA */ u32 stack_pg_ptrs; /* No of ptrs per stack page */ u32 stack_pg_bytes; /* Size of stack page */ @@ -272,6 +277,8 @@ struct otx2_ptp { u64 thresh; struct ptp_pin_desc extts_config; + u64 (*convert_rx_ptp_tstmp)(u64 timestamp); + u64 (*convert_tx_ptp_tstmp)(u64 timestamp); }; #define OTX2_HW_TIMESTAMP_LEN 8 @@ -396,6 +403,11 @@ struct otx2_nic { /* Devlink */ struct otx2_devlink *dl; +#ifdef CONFIG_DCB + /* PFC */ + u8 pfc_en; + u8 *queue_to_pfc_map; +#endif }; static inline bool is_otx2_lbkvf(struct pci_dev *pdev) @@ -863,6 +875,8 @@ int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable); int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf); bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx); u16 otx2_get_max_mtu(struct otx2_nic *pfvf); +int otx2_handle_ntuple_tc_features(struct net_device *netdev, + netdev_features_t features); /* tc support */ int otx2_init_tc(struct otx2_nic *nic); void otx2_shutdown_tc(struct otx2_nic *nic); @@ -876,4 +890,11 @@ int otx2_dmacflt_remove(struct otx2_nic *pf, const u8 *mac, u8 bit_pos); int otx2_dmacflt_update(struct otx2_nic *pf, u8 *mac, u8 bit_pos); void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf); void otx2_dmacflt_update_pfmac_flow(struct otx2_nic *pfvf); + +#ifdef CONFIG_DCB +/* DCB support*/ +void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, bool pfc_enable); +int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf); +int otx2_dcbnl_set_ops(struct net_device *dev); +#endif #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c new file mode 100644 index 000000000000..723d2506d309 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Ethernet driver + * + * Copyright (C) 2021 Marvell. + * + */ + +#include "otx2_common.h" + +int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf) +{ + struct cgx_pfc_cfg *req; + struct cgx_pfc_rsp *rsp; + int err = 0; + + if (is_otx2_lbkvf(pfvf->pdev)) + return 0; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_prio_flow_ctrl_cfg(&pfvf->mbox); + if (!req) { + err = -ENOMEM; + goto unlock; + } + + if (pfvf->pfc_en) { + req->rx_pause = true; + req->tx_pause = true; + } else { + req->rx_pause = false; + req->tx_pause = false; + } + req->pfc_en = pfvf->pfc_en; + + if (!otx2_sync_mbox_msg(&pfvf->mbox)) { + rsp = (struct cgx_pfc_rsp *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + if (req->rx_pause != rsp->rx_pause || req->tx_pause != rsp->tx_pause) { + dev_warn(pfvf->dev, + "Failed to config PFC\n"); + err = -EPERM; + } + } +unlock: + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, + bool pfc_enable) +{ + bool if_up = netif_running(pfvf->netdev); + struct npa_aq_enq_req *npa_aq; + struct nix_aq_enq_req *aq; + int err = 0; + + if (pfvf->queue_to_pfc_map[qidx] && pfc_enable) { + dev_warn(pfvf->dev, + "PFC enable not permitted as Priority %d already mapped to Queue %d\n", + pfvf->queue_to_pfc_map[qidx], qidx); + return; + } + + if (if_up) { + netif_tx_stop_all_queues(pfvf->netdev); + netif_carrier_off(pfvf->netdev); + } + + pfvf->queue_to_pfc_map[qidx] = vlan_prio; + + aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); + if (!aq) { + err = -ENOMEM; + goto out; + } + + aq->cq.bpid = pfvf->bpid[vlan_prio]; + aq->cq_mask.bpid = GENMASK(8, 0); + + /* Fill AQ info */ + aq->qidx = qidx; + aq->ctype = NIX_AQ_CTYPE_CQ; + aq->op = NIX_AQ_INSTOP_WRITE; + + otx2_sync_mbox_msg(&pfvf->mbox); + + npa_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!npa_aq) { + err = -ENOMEM; + goto out; + } + npa_aq->aura.nix0_bpid = pfvf->bpid[vlan_prio]; + npa_aq->aura_mask.nix0_bpid = GENMASK(8, 0); + + /* Fill NPA AQ info */ + npa_aq->aura_id = qidx; + npa_aq->ctype = NPA_AQ_CTYPE_AURA; + npa_aq->op = NPA_AQ_INSTOP_WRITE; + otx2_sync_mbox_msg(&pfvf->mbox); + +out: + if (if_up) { + netif_carrier_on(pfvf->netdev); + netif_tx_start_all_queues(pfvf->netdev); + } + + if (err) + dev_warn(pfvf->dev, + "Updating BPIDs in CQ and Aura contexts of RQ%d failed with err %d\n", + qidx, err); +} + +static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + + pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + pfc->pfc_en = pfvf->pfc_en; + + return 0; +} + +static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + int err; + + /* Save PFC configuration to interface */ + pfvf->pfc_en = pfc->pfc_en; + + err = otx2_config_priority_flow_ctrl(pfvf); + if (err) + return err; + + /* Request Per channel Bpids */ + if (pfc->pfc_en) + otx2_nix_config_bp(pfvf, true); + + return 0; +} + +static u8 otx2_dcbnl_getdcbx(struct net_device __always_unused *dev) +{ + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; +} + +static u8 otx2_dcbnl_setdcbx(struct net_device __always_unused *dev, u8 mode) +{ + return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0; +} + +static const struct dcbnl_rtnl_ops otx2_dcbnl_ops = { + .ieee_getpfc = otx2_dcbnl_ieee_getpfc, + .ieee_setpfc = otx2_dcbnl_ieee_setpfc, + .getdcbx = otx2_dcbnl_getdcbx, + .setdcbx = otx2_dcbnl_setdcbx, +}; + +int otx2_dcbnl_set_ops(struct net_device *dev) +{ + struct otx2_nic *pfvf = netdev_priv(dev); + + pfvf->queue_to_pfc_map = devm_kzalloc(pfvf->dev, pfvf->hw.rx_queues, + GFP_KERNEL); + if (!pfvf->queue_to_pfc_map) + return -ENOMEM; + dev->dcbnl_ops = &otx2_dcbnl_ops; + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index d85db90632d6..fc328de5345e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -371,6 +371,8 @@ static void otx2_get_ringparam(struct net_device *netdev, ring->rx_pending = qs->rqe_cnt ? qs->rqe_cnt : Q_COUNT(Q_SIZE_256); ring->tx_max_pending = Q_COUNT(Q_SIZE_MAX); ring->tx_pending = qs->sqe_cnt ? qs->sqe_cnt : Q_COUNT(Q_SIZE_4K); + kernel_ring->rx_buf_len = pfvf->hw.rbuf_len; + kernel_ring->cqe_size = pfvf->hw.xqe_size; } static int otx2_set_ringparam(struct net_device *netdev, @@ -379,6 +381,9 @@ static int otx2_set_ringparam(struct net_device *netdev, struct netlink_ext_ack *extack) { struct otx2_nic *pfvf = netdev_priv(netdev); + u32 rx_buf_len = kernel_ring->rx_buf_len; + u32 old_rx_buf_len = pfvf->hw.rbuf_len; + u32 xqe_size = kernel_ring->cqe_size; bool if_up = netif_running(netdev); struct otx2_qset *qs = &pfvf->qset; u32 rx_count, tx_count; @@ -386,6 +391,21 @@ static int otx2_set_ringparam(struct net_device *netdev, if (ring->rx_mini_pending || ring->rx_jumbo_pending) return -EINVAL; + /* Hardware supports max size of 32k for a receive buffer + * and 1536 is typical ethernet frame size. + */ + if (rx_buf_len && (rx_buf_len < 1536 || rx_buf_len > 32768)) { + netdev_err(netdev, + "Receive buffer range is 1536 - 32768"); + return -EINVAL; + } + + if (xqe_size != 128 && xqe_size != 512) { + netdev_err(netdev, + "Completion event size must be 128 or 512"); + return -EINVAL; + } + /* Permitted lengths are 16 64 256 1K 4K 16K 64K 256K 1M */ rx_count = ring->rx_pending; /* On some silicon variants a skid or reserved CQEs are @@ -403,7 +423,8 @@ static int otx2_set_ringparam(struct net_device *netdev, Q_COUNT(Q_SIZE_4K), Q_COUNT(Q_SIZE_MAX)); tx_count = Q_COUNT(Q_SIZE(tx_count, 3)); - if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt) + if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt && + rx_buf_len == old_rx_buf_len && xqe_size == pfvf->hw.xqe_size) return 0; if (if_up) @@ -413,6 +434,9 @@ static int otx2_set_ringparam(struct net_device *netdev, qs->sqe_cnt = tx_count; qs->rqe_cnt = rx_count; + pfvf->hw.rbuf_len = rx_buf_len; + pfvf->hw.xqe_size = xqe_size; + if (if_up) return netdev->netdev_ops->ndo_open(netdev); @@ -1207,6 +1231,8 @@ end: static const struct ethtool_ops otx2_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | + ETHTOOL_RING_USE_CQE_SIZE, .get_link = otx2_get_link, .get_drvinfo = otx2_get_drvinfo, .get_strings = otx2_get_strings, @@ -1326,6 +1352,8 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev, static const struct ethtool_ops otx2vf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | + ETHTOOL_RING_USE_CQE_SIZE, .get_link = otx2_get_link, .get_drvinfo = otx2vf_get_drvinfo, .get_strings = otx2vf_get_strings, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 77a13fb555fb..54f235c216a9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -21,8 +21,10 @@ struct otx2_flow { u16 entry; bool is_vf; u8 rss_ctx_id; +#define DMAC_FILTER_RULE BIT(0) +#define PFC_FLOWCTRL_RULE BIT(1) + u16 rule_type; int vf; - bool dmac_filter; }; enum dmac_req { @@ -899,6 +901,9 @@ static int otx2_is_flow_rule_dmacfilter(struct otx2_nic *pfvf, static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) { u64 ring_cookie = flow->flow_spec.ring_cookie; +#ifdef CONFIG_DCB + int vlan_prio, qidx, pfc_rule = 0; +#endif struct npc_install_flow_req *req; int err, vf = 0; @@ -940,6 +945,24 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) mutex_unlock(&pfvf->mbox.lock); return -EINVAL; } + +#ifdef CONFIG_DCB + /* Identify PFC rule if PFC enabled and ntuple rule is vlan */ + if (!vf && (req->features & BIT_ULL(NPC_OUTER_VID)) && + pfvf->pfc_en && req->op != NIX_RX_ACTIONOP_RSS) { + vlan_prio = ntohs(req->packet.vlan_tci) & + ntohs(req->mask.vlan_tci); + + /* Get the priority */ + vlan_prio >>= 13; + flow->rule_type |= PFC_FLOWCTRL_RULE; + /* Check if PFC enabled for this priority */ + if (pfvf->pfc_en & BIT(vlan_prio)) { + pfc_rule = true; + qidx = req->index; + } + } +#endif } /* ethtool ring_cookie has (VF + 1) for VF */ @@ -951,6 +974,12 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) /* Send message to AF */ err = otx2_sync_mbox_msg(&pfvf->mbox); + +#ifdef CONFIG_DCB + if (!err && pfc_rule) + otx2_update_bpid_in_rqctx(pfvf, vlan_prio, qidx, true); +#endif + mutex_unlock(&pfvf->mbox.lock); return err; } @@ -966,7 +995,7 @@ static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf, return -ENOMEM; pf_mac->entry = 0; - pf_mac->dmac_filter = true; + pf_mac->rule_type |= DMAC_FILTER_RULE; pf_mac->location = pfvf->flow_cfg->max_flows; memcpy(&pf_mac->flow_spec, &flow->flow_spec, sizeof(struct ethtool_rx_flow_spec)); @@ -1031,7 +1060,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) eth_hdr = &flow->flow_spec.h_u.ether_spec; /* Sync dmac filter table with updated fields */ - if (flow->dmac_filter) + if (flow->rule_type & DMAC_FILTER_RULE) return otx2_dmacflt_update(pfvf, eth_hdr->h_dest, flow->entry); @@ -1052,7 +1081,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) if (!test_bit(0, &flow_cfg->dmacflt_bmap)) otx2_add_flow_with_pfmac(pfvf, flow); - flow->dmac_filter = true; + flow->rule_type |= DMAC_FILTER_RULE; flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap, flow_cfg->dmacflt_max_flows); fsp->location = flow_cfg->max_flows + flow->entry; @@ -1120,7 +1149,7 @@ static void otx2_update_rem_pfmac(struct otx2_nic *pfvf, int req) bool found = false; list_for_each_entry(iter, &pfvf->flow_cfg->flow_list, list) { - if (iter->dmac_filter && iter->entry == 0) { + if ((iter->rule_type & DMAC_FILTER_RULE) && iter->entry == 0) { eth_hdr = &iter->flow_spec.h_u.ether_spec; if (req == DMAC_ADDR_DEL) { otx2_dmacflt_remove(pfvf, eth_hdr->h_dest, @@ -1156,7 +1185,7 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location) if (!flow) return -ENOENT; - if (flow->dmac_filter) { + if (flow->rule_type & DMAC_FILTER_RULE) { struct ethhdr *eth_hdr = &flow->flow_spec.h_u.ether_spec; /* user not allowed to remove dmac filter with interface mac */ @@ -1174,6 +1203,13 @@ int otx2_remove_flow(struct otx2_nic *pfvf, u32 location) flow_cfg->dmacflt_max_flows) == 1) otx2_update_rem_pfmac(pfvf, DMAC_ADDR_DEL); } else { +#ifdef CONFIG_DCB + if (flow->rule_type & PFC_FLOWCTRL_RULE) + otx2_update_bpid_in_rqctx(pfvf, 0, + flow->flow_spec.ring_cookie, + false); +#endif + err = otx2_remove_flow_msg(pfvf, flow->entry, false); } @@ -1383,7 +1419,7 @@ void otx2_dmacflt_reinstall_flows(struct otx2_nic *pf) struct ethhdr *eth_hdr; list_for_each_entry(iter, &pf->flow_cfg->flow_list, list) { - if (iter->dmac_filter) { + if (iter->rule_type & DMAC_FILTER_RULE) { eth_hdr = &iter->flow_spec.h_u.ether_spec; otx2_dmacflt_add(pf, eth_hdr->h_dest, iter->entry); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index d39341e4ab37..441aafc26a08 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1311,6 +1311,9 @@ static int otx2_get_rbuf_size(struct otx2_nic *pf, int mtu) int total_size; int rbuf_size; + if (pf->hw.rbuf_len) + return ALIGN(pf->hw.rbuf_len, OTX2_ALIGN) + OTX2_HEAD_ROOM; + /* The data transferred by NIX to memory consists of actual packet * plus additional data which has timestamp and/or EDSA/HIGIG2 * headers if interface is configured in corresponding modes. @@ -1694,9 +1697,6 @@ int otx2_open(struct net_device *netdev) if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK)) otx2_handle_link_event(pf); - /* Restore pause frame settings */ - otx2_config_pause_frm(pf); - /* Install DMAC Filters */ if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT) otx2_dmacflt_reinstall_flows(pf); @@ -1863,9 +1863,7 @@ static int otx2_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t changed = features ^ netdev->features; - bool ntuple = !!(features & NETIF_F_NTUPLE); struct otx2_nic *pf = netdev_priv(netdev); - bool tc = !!(features & NETIF_F_HW_TC); if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) return otx2_cgx_config_loopback(pf, @@ -1875,46 +1873,7 @@ static int otx2_set_features(struct net_device *netdev, return otx2_enable_rxvlan(pf, features & NETIF_F_HW_VLAN_CTAG_RX); - if ((changed & NETIF_F_NTUPLE) && !ntuple) - otx2_destroy_ntuple_flows(pf); - - if ((changed & NETIF_F_NTUPLE) && ntuple) { - if (!pf->flow_cfg->max_flows) { - netdev_err(netdev, - "Can't enable NTUPLE, MCAM entries not allocated\n"); - return -EINVAL; - } - } - - if ((changed & NETIF_F_HW_TC) && tc) { - if (!pf->flow_cfg->max_flows) { - netdev_err(netdev, - "Can't enable TC, MCAM entries not allocated\n"); - return -EINVAL; - } - } - - if ((changed & NETIF_F_HW_TC) && !tc && - pf->flow_cfg && pf->flow_cfg->nr_flows) { - netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n"); - return -EBUSY; - } - - if ((changed & NETIF_F_NTUPLE) && ntuple && - (netdev->features & NETIF_F_HW_TC) && !(changed & NETIF_F_HW_TC)) { - netdev_err(netdev, - "Can't enable NTUPLE when TC is active, disable TC and retry\n"); - return -EINVAL; - } - - if ((changed & NETIF_F_HW_TC) && tc && - (netdev->features & NETIF_F_NTUPLE) && !(changed & NETIF_F_NTUPLE)) { - netdev_err(netdev, - "Can't enable TC when NTUPLE is active, disable NTUPLE and retry\n"); - return -EINVAL; - } - - return 0; + return otx2_handle_ntuple_tc_features(netdev, features); } static void otx2_reset_task(struct work_struct *work) @@ -2625,6 +2584,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->tx_queues = qcount; hw->tot_tx_queues = qcount; hw->max_queues = qcount; + hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; + /* Use CQE of 128 byte descriptor size by default */ + hw->xqe_size = 128; num_vec = pci_msix_vec_count(pdev); hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, @@ -2778,9 +2740,11 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Enable link notifications */ otx2_cgx_config_linkevents(pf, true); - /* Enable pause frames by default */ - pf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; - pf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED; +#ifdef CONFIG_DCB + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_pf_sriov_init; +#endif return 0; @@ -2925,6 +2889,21 @@ static void otx2_remove(struct pci_dev *pdev) if (pf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED) otx2_config_hw_rx_tstamp(pf, false); + /* Disable 802.3x pause frames */ + if (pf->flags & OTX2_FLAG_RX_PAUSE_ENABLED || + (pf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) { + pf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED; + pf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED; + otx2_config_pause_frm(pf); + } + +#ifdef CONFIG_DCB + /* Disable PFC config */ + if (pf->pfc_en) { + pf->pfc_en = 0; + otx2_config_priority_flow_ctrl(pf); + } +#endif cancel_work_sync(&pf->reset_task); /* Disable link notifications */ otx2_cgx_config_linkevents(pf, false); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c index 61c20907315f..fdc2c9315b91 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c @@ -294,6 +294,14 @@ int otx2_ptp_init(struct otx2_nic *pfvf) goto error; } + if (is_dev_otx2(pfvf->pdev)) { + ptp_ptr->convert_rx_ptp_tstmp = &otx2_ptp_convert_rx_timestamp; + ptp_ptr->convert_tx_ptp_tstmp = &otx2_ptp_convert_tx_timestamp; + } else { + ptp_ptr->convert_rx_ptp_tstmp = &cn10k_ptp_convert_timestamp; + ptp_ptr->convert_tx_ptp_tstmp = &cn10k_ptp_convert_timestamp; + } + pfvf->ptp = ptp_ptr; error: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h index 6ff284211d7b..7ff41927ceaf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h @@ -8,6 +8,21 @@ #ifndef OTX2_PTP_H #define OTX2_PTP_H +static inline u64 otx2_ptp_convert_rx_timestamp(u64 timestamp) +{ + return be64_to_cpu(*(__be64 *)×tamp); +} + +static inline u64 otx2_ptp_convert_tx_timestamp(u64 timestamp) +{ + return timestamp; +} + +static inline u64 cn10k_ptp_convert_timestamp(u64 timestamp) +{ + return ((timestamp >> 32) * NSEC_PER_SEC) + (timestamp & 0xFFFFFFFFUL); +} + int otx2_ptp_init(struct otx2_nic *pfvf); void otx2_ptp_destroy(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 626961a41089..28b19945d716 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -58,7 +58,7 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic) { struct otx2_tc_info *tc = &nic->tc_info; - if (!nic->flow_cfg->max_flows || is_otx2_vf(nic->pcifunc)) + if (!nic->flow_cfg->max_flows) return 0; /* Max flows changed, free the existing bitmap */ @@ -190,6 +190,40 @@ static int otx2_tc_validate_flow(struct otx2_nic *nic, return 0; } +static int otx2_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, struct tc_cls_matchall_offload *cls) { @@ -212,6 +246,10 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, entry = &cls->rule->action.entries[0]; switch (entry->id) { case FLOW_ACTION_POLICE: + err = otx2_policer_validate(&cls->rule->action, entry, extack); + if (err) + return err; + if (entry->police.rate_pkt_ps) { NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); return -EOPNOTSUPP; @@ -315,6 +353,7 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, u8 nr_police = 0; bool pps = false; u64 rate; + int err; int i; if (!flow_action_has_entries(flow_action)) { @@ -355,6 +394,10 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, return -EOPNOTSUPP; } + err = otx2_policer_validate(flow_action, act, extack); + if (err) + return err; + if (act->police.rate_bytes_ps > 0) { rate = act->police.rate_bytes_ps * 8; burst = act->police.burst; @@ -1023,6 +1066,7 @@ int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, return -EOPNOTSUPP; } } +EXPORT_SYMBOL(otx2_setup_tc); static const struct rhashtable_params tc_flow_ht_params = { .head_offset = offsetof(struct otx2_tc_flow, node), @@ -1052,6 +1096,7 @@ int otx2_init_tc(struct otx2_nic *nic) tc->flow_ht_params = tc_flow_ht_params; return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); } +EXPORT_SYMBOL(otx2_init_tc); void otx2_shutdown_tc(struct otx2_nic *nic) { @@ -1060,3 +1105,4 @@ void otx2_shutdown_tc(struct otx2_nic *nic) kfree(tc->tc_entries_bitmap); rhashtable_destroy(&tc->flow_table); } +EXPORT_SYMBOL(otx2_shutdown_tc); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 7c4068c5d1ac..c26de15b2ac3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -148,6 +148,7 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf, if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) { timestamp = ((u64 *)sq->timestamps->base)[snd_comp->sqe_id]; if (timestamp != 1) { + timestamp = pfvf->ptp->convert_tx_ptp_tstmp(timestamp); err = otx2_ptp_tstamp2time(pfvf, timestamp, &tsns); if (!err) { memset(&ts, 0, sizeof(ts)); @@ -167,14 +168,15 @@ static void otx2_snd_pkt_handler(struct otx2_nic *pfvf, static void otx2_set_rxtstamp(struct otx2_nic *pfvf, struct sk_buff *skb, void *data) { - u64 tsns; + u64 timestamp, tsns; int err; if (!(pfvf->flags & OTX2_FLAG_RX_TSTAMP_ENABLED)) return; + timestamp = pfvf->ptp->convert_rx_ptp_tstmp(*(u64 *)data); /* The first 8 bytes is the timestamp */ - err = otx2_ptp_tstamp2time(pfvf, be64_to_cpu(*(__be64 *)data), &tsns); + err = otx2_ptp_tstamp2time(pfvf, timestamp, &tsns); if (err) return; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 925b74ebb8b0..9e87836ed8bf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -472,23 +472,7 @@ static void otx2vf_reset_task(struct work_struct *work) static int otx2vf_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t changed = features ^ netdev->features; - bool ntuple_enabled = !!(features & NETIF_F_NTUPLE); - struct otx2_nic *vf = netdev_priv(netdev); - - if (changed & NETIF_F_NTUPLE) { - if (!ntuple_enabled) { - otx2_mcam_flow_del(vf); - return 0; - } - - if (!otx2_get_maxflows(vf->flow_cfg)) { - netdev_err(netdev, - "Can't enable NTUPLE, MCAM entries not allocated\n"); - return -EINVAL; - } - } - return 0; + return otx2_handle_ntuple_tc_features(netdev, features); } static const struct net_device_ops otx2vf_netdev_ops = { @@ -502,6 +486,7 @@ static const struct net_device_ops otx2vf_netdev_ops = { .ndo_get_stats64 = otx2_get_stats64, .ndo_tx_timeout = otx2_tx_timeout, .ndo_eth_ioctl = otx2_ioctl, + .ndo_setup_tc = otx2_setup_tc, }; static int otx2_wq_init(struct otx2_nic *vf) @@ -586,6 +571,9 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->tx_queues = qcount; hw->max_queues = qcount; hw->tot_tx_queues = qcount; + hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; + /* Use CQE of 128 byte descriptor size by default */ + hw->xqe_size = 128; hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, GFP_KERNEL); @@ -662,6 +650,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) netdev->hw_features |= NETIF_F_NTUPLE; netdev->hw_features |= NETIF_F_RXALL; + netdev->hw_features |= NETIF_F_HW_TC; netif_set_gso_max_segs(netdev, OTX2_MAX_GSO_SEGS); netdev->watchdog_timeo = OTX2_TX_TIMEOUT; @@ -697,16 +686,24 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_unreg_netdev; - err = otx2_register_dl(vf); + err = otx2_init_tc(vf); if (err) goto err_unreg_netdev; - /* Enable pause frames by default */ - vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED; - vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED; + err = otx2_register_dl(vf); + if (err) + goto err_shutdown_tc; + +#ifdef CONFIG_DCB + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_shutdown_tc; +#endif return 0; +err_shutdown_tc: + otx2_shutdown_tc(vf); err_unreg_netdev: unregister_netdev(netdev); err_ptp_destroy: @@ -739,6 +736,22 @@ static void otx2vf_remove(struct pci_dev *pdev) vf = netdev_priv(netdev); + /* Disable 802.3x pause frames */ + if (vf->flags & OTX2_FLAG_RX_PAUSE_ENABLED || + (vf->flags & OTX2_FLAG_TX_PAUSE_ENABLED)) { + vf->flags &= ~OTX2_FLAG_RX_PAUSE_ENABLED; + vf->flags &= ~OTX2_FLAG_TX_PAUSE_ENABLED; + otx2_config_pause_frm(vf); + } + +#ifdef CONFIG_DCB + /* Disable PFC config */ + if (vf->pfc_en) { + vf->pfc_en = 0; + otx2_config_priority_flow_ctrl(vf); + } +#endif + cancel_work_sync(&vf->reset_task); otx2_unregister_dl(vf); unregister_netdev(netdev); diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h index 2fd9ef2fe5d6..6f754ae2a584 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera.h +++ b/drivers/net/ethernet/marvell/prestera/prestera.h @@ -281,8 +281,11 @@ struct prestera_router { struct prestera_switch *sw; struct list_head vr_list; struct list_head rif_entry_list; + struct rhashtable fib_ht; + struct rhashtable kern_fib_cache_ht; struct notifier_block inetaddr_nb; struct notifier_block inetaddr_valid_nb; + struct notifier_block fib_nb; }; struct prestera_rxtx_params { @@ -325,6 +328,8 @@ int prestera_port_cfg_mac_write(struct prestera_port *port, struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev); +void prestera_queue_work(struct work_struct *work); + int prestera_port_pvid_set(struct prestera_port *port, u16 vid); bool prestera_netdev_check(const struct net_device *dev); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.c b/drivers/net/ethernet/marvell/prestera/prestera_acl.c index f0d9f592173b..47c899c08951 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_acl.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.c @@ -22,6 +22,7 @@ struct prestera_acl { struct prestera_acl_ruleset_ht_key { struct prestera_flow_block *block; + u32 chain_index; }; struct prestera_acl_rule_entry { @@ -34,6 +35,10 @@ struct prestera_acl_rule_entry { u8 valid:1; } accept, drop, trap; struct { + struct prestera_acl_action_jump i; + u8 valid:1; + } jump; + struct { u32 id; struct prestera_counter_block *block; } counter; @@ -49,6 +54,7 @@ struct prestera_acl_ruleset { refcount_t refcount; void *keymask; u32 vtcam_id; + u32 index; u16 pcl_id; bool offload; }; @@ -83,20 +89,45 @@ static const struct rhashtable_params __prestera_acl_rule_entry_ht_params = { .automatic_shrinking = true, }; +int prestera_acl_chain_to_client(u32 chain_index, u32 *client) +{ + static const u32 client_map[] = { + PRESTERA_HW_COUNTER_CLIENT_LOOKUP_0, + PRESTERA_HW_COUNTER_CLIENT_LOOKUP_1, + PRESTERA_HW_COUNTER_CLIENT_LOOKUP_2 + }; + + if (chain_index >= ARRAY_SIZE(client_map)) + return -EINVAL; + + *client = client_map[chain_index]; + return 0; +} + +static bool prestera_acl_chain_is_supported(u32 chain_index) +{ + return (chain_index & ~PRESTERA_ACL_CHAIN_MASK) == 0; +} + static struct prestera_acl_ruleset * prestera_acl_ruleset_create(struct prestera_acl *acl, - struct prestera_flow_block *block) + struct prestera_flow_block *block, + u32 chain_index) { struct prestera_acl_ruleset *ruleset; u32 uid = 0; int err; + if (!prestera_acl_chain_is_supported(chain_index)) + return ERR_PTR(-EINVAL); + ruleset = kzalloc(sizeof(*ruleset), GFP_KERNEL); if (!ruleset) return ERR_PTR(-ENOMEM); ruleset->acl = acl; ruleset->ht_key.block = block; + ruleset->ht_key.chain_index = chain_index; refcount_set(&ruleset->refcount, 1); err = rhashtable_init(&ruleset->rule_ht, &prestera_acl_rule_ht_params); @@ -108,7 +139,9 @@ prestera_acl_ruleset_create(struct prestera_acl *acl, goto err_ruleset_create; /* make pcl-id based on uid */ - ruleset->pcl_id = (u8)uid; + ruleset->pcl_id = PRESTERA_ACL_PCL_ID_MAKE((u8)uid, chain_index); + ruleset->index = uid; + err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node, prestera_acl_ruleset_ht_params); if (err) @@ -133,35 +166,64 @@ void prestera_acl_ruleset_keymask_set(struct prestera_acl_ruleset *ruleset, int prestera_acl_ruleset_offload(struct prestera_acl_ruleset *ruleset) { + struct prestera_acl_iface iface; u32 vtcam_id; int err; if (ruleset->offload) return -EEXIST; - err = prestera_acl_vtcam_id_get(ruleset->acl, 0, + err = prestera_acl_vtcam_id_get(ruleset->acl, + ruleset->ht_key.chain_index, ruleset->keymask, &vtcam_id); if (err) - return err; + goto err_vtcam_create; + + if (ruleset->ht_key.chain_index) { + /* for chain > 0, bind iface index to pcl-id to be able + * to jump from any other ruleset to this one using the index. + */ + iface.index = ruleset->index; + iface.type = PRESTERA_ACL_IFACE_TYPE_INDEX; + err = prestera_hw_vtcam_iface_bind(ruleset->acl->sw, &iface, + vtcam_id, ruleset->pcl_id); + if (err) + goto err_ruleset_bind; + } ruleset->vtcam_id = vtcam_id; ruleset->offload = true; return 0; + +err_ruleset_bind: + prestera_acl_vtcam_id_put(ruleset->acl, ruleset->vtcam_id); +err_vtcam_create: + return err; } static void prestera_acl_ruleset_destroy(struct prestera_acl_ruleset *ruleset) { struct prestera_acl *acl = ruleset->acl; u8 uid = ruleset->pcl_id & PRESTERA_ACL_KEYMASK_PCL_ID_USER; + int err; rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node, prestera_acl_ruleset_ht_params); - if (ruleset->offload) + if (ruleset->offload) { + if (ruleset->ht_key.chain_index) { + struct prestera_acl_iface iface = { + .type = PRESTERA_ACL_IFACE_TYPE_INDEX, + .index = ruleset->index + }; + err = prestera_hw_vtcam_iface_unbind(acl->sw, &iface, + ruleset->vtcam_id); + WARN_ON(err); + } WARN_ON(prestera_acl_vtcam_id_put(acl, ruleset->vtcam_id)); + } idr_remove(&acl->uid, uid); - rhashtable_destroy(&ruleset->rule_ht); kfree(ruleset->keymask); kfree(ruleset); @@ -169,23 +231,26 @@ static void prestera_acl_ruleset_destroy(struct prestera_acl_ruleset *ruleset) static struct prestera_acl_ruleset * __prestera_acl_ruleset_lookup(struct prestera_acl *acl, - struct prestera_flow_block *block) + struct prestera_flow_block *block, + u32 chain_index) { struct prestera_acl_ruleset_ht_key ht_key; memset(&ht_key, 0, sizeof(ht_key)); ht_key.block = block; + ht_key.chain_index = chain_index; return rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, prestera_acl_ruleset_ht_params); } struct prestera_acl_ruleset * prestera_acl_ruleset_lookup(struct prestera_acl *acl, - struct prestera_flow_block *block) + struct prestera_flow_block *block, + u32 chain_index) { struct prestera_acl_ruleset *ruleset; - ruleset = __prestera_acl_ruleset_lookup(acl, block); + ruleset = __prestera_acl_ruleset_lookup(acl, block, chain_index); if (!ruleset) return ERR_PTR(-ENOENT); @@ -195,17 +260,18 @@ prestera_acl_ruleset_lookup(struct prestera_acl *acl, struct prestera_acl_ruleset * prestera_acl_ruleset_get(struct prestera_acl *acl, - struct prestera_flow_block *block) + struct prestera_flow_block *block, + u32 chain_index) { struct prestera_acl_ruleset *ruleset; - ruleset = __prestera_acl_ruleset_lookup(acl, block); + ruleset = __prestera_acl_ruleset_lookup(acl, block, chain_index); if (ruleset) { refcount_inc(&ruleset->refcount); return ruleset; } - return prestera_acl_ruleset_create(acl, block); + return prestera_acl_ruleset_create(acl, block, chain_index); } void prestera_acl_ruleset_put(struct prestera_acl_ruleset *ruleset) @@ -293,6 +359,11 @@ prestera_acl_rule_lookup(struct prestera_acl_ruleset *ruleset, prestera_acl_rule_ht_params); } +u32 prestera_acl_ruleset_index_get(const struct prestera_acl_ruleset *ruleset) +{ + return ruleset->index; +} + bool prestera_acl_ruleset_is_offload(struct prestera_acl_ruleset *ruleset) { return ruleset->offload; @@ -300,7 +371,7 @@ bool prestera_acl_ruleset_is_offload(struct prestera_acl_ruleset *ruleset) struct prestera_acl_rule * prestera_acl_rule_create(struct prestera_acl_ruleset *ruleset, - unsigned long cookie) + unsigned long cookie, u32 chain_index) { struct prestera_acl_rule *rule; @@ -310,6 +381,7 @@ prestera_acl_rule_create(struct prestera_acl_ruleset *ruleset, rule->ruleset = ruleset; rule->cookie = cookie; + rule->chain_index = chain_index; refcount_inc(&ruleset->refcount); @@ -324,6 +396,10 @@ void prestera_acl_rule_priority_set(struct prestera_acl_rule *rule, void prestera_acl_rule_destroy(struct prestera_acl_rule *rule) { + if (rule->jump_ruleset) + /* release ruleset kept by jump action */ + prestera_acl_ruleset_put(rule->jump_ruleset); + prestera_acl_ruleset_put(rule->ruleset); kfree(rule); } @@ -347,7 +423,10 @@ int prestera_acl_rule_add(struct prestera_switch *sw, /* setup counter */ rule->re_arg.count.valid = true; - rule->re_arg.count.client = PRESTERA_HW_COUNTER_CLIENT_LOOKUP_0; + err = prestera_acl_chain_to_client(ruleset->ht_key.chain_index, + &rule->re_arg.count.client); + if (err) + goto err_rule_add; rule->re = prestera_acl_rule_entry_find(sw->acl, &rule->re_key); err = WARN_ON(rule->re) ? -EEXIST : 0; @@ -360,8 +439,10 @@ int prestera_acl_rule_add(struct prestera_switch *sw, if (err) goto err_rule_add; - /* bind the block (all ports) to chain index 0 */ - if (!ruleset->rule_count) { + /* bind the block (all ports) to chain index 0, rest of + * the chains are bound to goto action + */ + if (!ruleset->ht_key.chain_index && !ruleset->rule_count) { err = prestera_acl_ruleset_block_bind(ruleset, block); if (err) goto err_acl_block_bind; @@ -395,7 +476,7 @@ void prestera_acl_rule_del(struct prestera_switch *sw, prestera_acl_rule_entry_destroy(sw->acl, rule->re); /* unbind block (all ports) */ - if (!ruleset->rule_count) + if (!ruleset->ht_key.chain_index && !ruleset->rule_count) prestera_acl_ruleset_block_unbind(ruleset, block); } @@ -459,6 +540,12 @@ static int __prestera_acl_rule_entry2hw_add(struct prestera_switch *sw, act_hw[act_num].id = PRESTERA_ACL_RULE_ACTION_TRAP; act_num++; } + /* jump */ + if (e->jump.valid) { + act_hw[act_num].id = PRESTERA_ACL_RULE_ACTION_JUMP; + act_hw[act_num].jump = e->jump.i; + act_num++; + } /* counter */ if (e->counter.block) { act_hw[act_num].id = PRESTERA_ACL_RULE_ACTION_COUNT; @@ -505,6 +592,9 @@ __prestera_acl_rule_entry_act_construct(struct prestera_switch *sw, e->drop.valid = arg->drop.valid; /* trap */ e->trap.valid = arg->trap.valid; + /* jump */ + e->jump.valid = arg->jump.valid; + e->jump.i = arg->jump.i; /* counter */ if (arg->count.valid) { int err; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.h b/drivers/net/ethernet/marvell/prestera/prestera_acl.h index 40f6c1d961fa..6d2ad27682d1 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_acl.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.h @@ -10,6 +10,14 @@ #define PRESTERA_ACL_KEYMASK_PCL_ID 0x3FF #define PRESTERA_ACL_KEYMASK_PCL_ID_USER \ (PRESTERA_ACL_KEYMASK_PCL_ID & 0x00FF) +#define PRESTERA_ACL_KEYMASK_PCL_ID_CHAIN \ + (PRESTERA_ACL_KEYMASK_PCL_ID & 0xFF00) +#define PRESTERA_ACL_CHAIN_MASK \ + (PRESTERA_ACL_KEYMASK_PCL_ID >> 8) + +#define PRESTERA_ACL_PCL_ID_MAKE(uid, chain_id) \ + (((uid) & PRESTERA_ACL_KEYMASK_PCL_ID_USER) | \ + (((chain_id) << 8) & PRESTERA_ACL_KEYMASK_PCL_ID_CHAIN)) #define rule_match_set_n(match_p, type, val_p, size) \ memcpy(&(match_p)[PRESTERA_ACL_RULE_MATCH_TYPE_##type], \ @@ -46,6 +54,7 @@ enum prestera_acl_rule_action { PRESTERA_ACL_RULE_ACTION_ACCEPT = 0, PRESTERA_ACL_RULE_ACTION_DROP = 1, PRESTERA_ACL_RULE_ACTION_TRAP = 2, + PRESTERA_ACL_RULE_ACTION_JUMP = 5, PRESTERA_ACL_RULE_ACTION_COUNT = 7, PRESTERA_ACL_RULE_ACTION_MAX @@ -61,6 +70,10 @@ struct prestera_acl_match { __be32 mask[__PRESTERA_ACL_RULE_MATCH_TYPE_MAX]; }; +struct prestera_acl_action_jump { + u32 index; +}; + struct prestera_acl_action_count { u32 id; }; @@ -74,6 +87,7 @@ struct prestera_acl_hw_action_info { enum prestera_acl_rule_action id; union { struct prestera_acl_action_count count; + struct prestera_acl_action_jump jump; }; }; @@ -88,6 +102,10 @@ struct prestera_acl_rule_entry_arg { u8 valid:1; } accept, drop, trap; struct { + struct prestera_acl_action_jump i; + u8 valid:1; + } jump; + struct { u8 valid:1; u32 client; } count; @@ -98,7 +116,9 @@ struct prestera_acl_rule { struct rhash_head ht_node; /* Member of acl HT */ struct list_head list; struct prestera_acl_ruleset *ruleset; + struct prestera_acl_ruleset *jump_ruleset; unsigned long cookie; + u32 chain_index; u32 priority; struct prestera_acl_rule_entry_key re_key; struct prestera_acl_rule_entry_arg re_arg; @@ -122,7 +142,7 @@ void prestera_acl_fini(struct prestera_switch *sw); struct prestera_acl_rule * prestera_acl_rule_create(struct prestera_acl_ruleset *ruleset, - unsigned long cookie); + unsigned long cookie, u32 chain_index); void prestera_acl_rule_priority_set(struct prestera_acl_rule *rule, u32 priority); void prestera_acl_rule_destroy(struct prestera_acl_rule *rule); @@ -147,10 +167,12 @@ prestera_acl_rule_entry_create(struct prestera_acl *acl, struct prestera_acl_rule_entry_arg *arg); struct prestera_acl_ruleset * prestera_acl_ruleset_get(struct prestera_acl *acl, - struct prestera_flow_block *block); + struct prestera_flow_block *block, + u32 chain_index); struct prestera_acl_ruleset * prestera_acl_ruleset_lookup(struct prestera_acl *acl, - struct prestera_flow_block *block); + struct prestera_flow_block *block, + u32 chain_index); void prestera_acl_ruleset_keymask_set(struct prestera_acl_ruleset *ruleset, void *keymask); bool prestera_acl_ruleset_is_offload(struct prestera_acl_ruleset *ruleset); @@ -160,6 +182,7 @@ int prestera_acl_ruleset_bind(struct prestera_acl_ruleset *ruleset, struct prestera_port *port); int prestera_acl_ruleset_unbind(struct prestera_acl_ruleset *ruleset, struct prestera_port *port); +u32 prestera_acl_ruleset_index_get(const struct prestera_acl_ruleset *ruleset); void prestera_acl_rule_keymask_pcl_id_set(struct prestera_acl_rule *rule, u16 pcl_id); @@ -167,5 +190,6 @@ prestera_acl_rule_keymask_pcl_id_set(struct prestera_acl_rule *rule, int prestera_acl_vtcam_id_get(struct prestera_acl *acl, u8 lookup, void *keymask, u32 *vtcam_id); int prestera_acl_vtcam_id_put(struct prestera_acl *acl, u32 vtcam_id); +int prestera_acl_chain_to_client(u32 chain_index, u32 *client); #endif /* _PRESTERA_ACL_H_ */ diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flow.c b/drivers/net/ethernet/marvell/prestera/prestera_flow.c index d849f046ece7..05c3ad98eba9 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flow.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flow.c @@ -29,9 +29,6 @@ static int prestera_flow_block_mall_cb(struct prestera_flow_block *block, static int prestera_flow_block_flower_cb(struct prestera_flow_block *block, struct flow_cls_offload *f) { - if (f->common.chain_index != 0) - return -EOPNOTSUPP; - switch (f->command) { case FLOW_CLS_REPLACE: return prestera_flower_replace(block, f); @@ -71,6 +68,7 @@ static void prestera_flow_block_destroy(void *cb_priv) prestera_flower_template_cleanup(block); + WARN_ON(!list_empty(&block->template_list)); WARN_ON(!list_empty(&block->binding_list)); kfree(block); @@ -86,6 +84,7 @@ prestera_flow_block_create(struct prestera_switch *sw, struct net *net) return NULL; INIT_LIST_HEAD(&block->binding_list); + INIT_LIST_HEAD(&block->template_list); block->net = net; block->sw = sw; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flow.h b/drivers/net/ethernet/marvell/prestera/prestera_flow.h index 1ea5b745bf72..6550278b166a 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flow.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_flow.h @@ -8,7 +8,6 @@ struct prestera_port; struct prestera_switch; -struct prestera_flower_template; struct prestera_flow_block_binding { struct list_head list; @@ -22,7 +21,7 @@ struct prestera_flow_block { struct net *net; struct prestera_acl_ruleset *ruleset_zero; struct flow_block_cb *block_cb; - struct prestera_flower_template *tmplt; + struct list_head template_list; unsigned int rule_count; }; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index 19c1417fd05f..921959a980ee 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -8,26 +8,63 @@ struct prestera_flower_template { struct prestera_acl_ruleset *ruleset; + struct list_head list; + u32 chain_index; }; +static void +prestera_flower_template_free(struct prestera_flower_template *template) +{ + prestera_acl_ruleset_put(template->ruleset); + list_del(&template->list); + kfree(template); +} + void prestera_flower_template_cleanup(struct prestera_flow_block *block) { - if (block->tmplt) { - /* put the reference to the ruleset kept in create */ - prestera_acl_ruleset_put(block->tmplt->ruleset); - kfree(block->tmplt); - block->tmplt = NULL; - return; - } + struct prestera_flower_template *template, *tmp; + + /* put the reference to all rulesets kept in tmpl create */ + list_for_each_entry_safe(template, tmp, &block->template_list, list) + prestera_flower_template_free(template); +} + +static int +prestera_flower_parse_goto_action(struct prestera_flow_block *block, + struct prestera_acl_rule *rule, + u32 chain_index, + const struct flow_action_entry *act) +{ + struct prestera_acl_ruleset *ruleset; + + if (act->chain_index <= chain_index) + /* we can jump only forward */ + return -EINVAL; + + if (rule->re_arg.jump.valid) + return -EEXIST; + + ruleset = prestera_acl_ruleset_get(block->sw->acl, block, + act->chain_index); + if (IS_ERR(ruleset)) + return PTR_ERR(ruleset); + + rule->re_arg.jump.valid = 1; + rule->re_arg.jump.i.index = prestera_acl_ruleset_index_get(ruleset); + + rule->jump_ruleset = ruleset; + + return 0; } static int prestera_flower_parse_actions(struct prestera_flow_block *block, struct prestera_acl_rule *rule, struct flow_action *flow_action, + u32 chain_index, struct netlink_ext_ack *extack) { const struct flow_action_entry *act; - int i; + int err, i; /* whole struct (rule->re_arg) must be initialized with 0 */ if (!flow_action_has_entries(flow_action)) @@ -53,6 +90,13 @@ static int prestera_flower_parse_actions(struct prestera_flow_block *block, rule->re_arg.trap.valid = 1; break; + case FLOW_ACTION_GOTO: + err = prestera_flower_parse_goto_action(block, rule, + chain_index, + act); + if (err) + return err; + break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); pr_err("Unsupported action\n"); @@ -259,6 +303,7 @@ static int prestera_flower_parse(struct prestera_flow_block *block, } return prestera_flower_parse_actions(block, rule, &f->rule->action, + f->common.chain_index, f->common.extack); } @@ -270,12 +315,13 @@ int prestera_flower_replace(struct prestera_flow_block *block, struct prestera_acl_rule *rule; int err; - ruleset = prestera_acl_ruleset_get(acl, block); + ruleset = prestera_acl_ruleset_get(acl, block, f->common.chain_index); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); /* increments the ruleset reference */ - rule = prestera_acl_rule_create(ruleset, f->cookie); + rule = prestera_acl_rule_create(ruleset, f->cookie, + f->common.chain_index); if (IS_ERR(rule)) { err = PTR_ERR(rule); goto err_rule_create; @@ -312,7 +358,8 @@ void prestera_flower_destroy(struct prestera_flow_block *block, struct prestera_acl_ruleset *ruleset; struct prestera_acl_rule *rule; - ruleset = prestera_acl_ruleset_lookup(block->sw->acl, block); + ruleset = prestera_acl_ruleset_lookup(block->sw->acl, block, + f->common.chain_index); if (IS_ERR(ruleset)) return; @@ -345,7 +392,8 @@ int prestera_flower_tmplt_create(struct prestera_flow_block *block, } prestera_acl_rule_keymask_pcl_id_set(&rule, 0); - ruleset = prestera_acl_ruleset_get(block->sw->acl, block); + ruleset = prestera_acl_ruleset_get(block->sw->acl, block, + f->common.chain_index); if (IS_ERR_OR_NULL(ruleset)) { err = -EINVAL; goto err_ruleset_get; @@ -364,7 +412,8 @@ int prestera_flower_tmplt_create(struct prestera_flow_block *block, /* keep the reference to the ruleset */ template->ruleset = ruleset; - block->tmplt = template; + template->chain_index = f->common.chain_index; + list_add_rcu(&template->list, &block->template_list); return 0; err_ruleset_get: @@ -377,7 +426,14 @@ err_malloc: void prestera_flower_tmplt_destroy(struct prestera_flow_block *block, struct flow_cls_offload *f) { - prestera_flower_template_cleanup(block); + struct prestera_flower_template *template, *tmp; + + list_for_each_entry_safe(template, tmp, &block->template_list, list) + if (template->chain_index == f->common.chain_index) { + /* put the reference to the ruleset kept in create */ + prestera_flower_template_free(template); + return; + } } int prestera_flower_stats(struct prestera_flow_block *block, @@ -390,7 +446,8 @@ int prestera_flower_stats(struct prestera_flow_block *block, u64 bytes; int err; - ruleset = prestera_acl_ruleset_lookup(block->sw->acl, block); + ruleset = prestera_acl_ruleset_lookup(block->sw->acl, block, + f->common.chain_index); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.h b/drivers/net/ethernet/marvell/prestera/prestera_flower.h index dc3aa4280e9f..495f151e6fa9 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.h @@ -6,7 +6,6 @@ #include <net/pkt_cls.h> -struct prestera_switch; struct prestera_flow_block; int prestera_flower_replace(struct prestera_flow_block *block, diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c index e6bfadc874c5..c66cc929c820 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c @@ -55,6 +55,8 @@ enum prestera_cmd_type_t { PRESTERA_CMD_TYPE_ROUTER_RIF_CREATE = 0x600, PRESTERA_CMD_TYPE_ROUTER_RIF_DELETE = 0x601, + PRESTERA_CMD_TYPE_ROUTER_LPM_ADD = 0x610, + PRESTERA_CMD_TYPE_ROUTER_LPM_DELETE = 0x611, PRESTERA_CMD_TYPE_ROUTER_VR_CREATE = 0x630, PRESTERA_CMD_TYPE_ROUTER_VR_DELETE = 0x631, @@ -424,6 +426,9 @@ struct prestera_msg_acl_action { __le32 __reserved; union { struct { + __le32 index; + } jump; + struct { __le32 id; } count; __le32 reserved[6]; @@ -499,6 +504,15 @@ struct prestera_msg_iface { u8 __pad[3]; }; +struct prestera_msg_ip_addr { + union { + __be32 ipv4; + __be32 ipv6[4]; + } u; + u8 v; /* e.g. PRESTERA_IPV4 */ + u8 __pad[3]; +}; + struct prestera_msg_rif_req { struct prestera_msg_cmd cmd; struct prestera_msg_iface iif; @@ -515,6 +529,15 @@ struct prestera_msg_rif_resp { u8 __pad[2]; }; +struct prestera_msg_lpm_req { + struct prestera_msg_cmd cmd; + struct prestera_msg_ip_addr dst; + __le32 grp_id; + __le32 dst_len; + __le16 vr_id; + u8 __pad[2]; +}; + struct prestera_msg_vr_req { struct prestera_msg_cmd cmd; __le16 vr_id; @@ -598,9 +621,11 @@ static void prestera_hw_build_tests(void) BUILD_BUG_ON(sizeof(struct prestera_msg_counter_stats) != 16); BUILD_BUG_ON(sizeof(struct prestera_msg_rif_req) != 36); BUILD_BUG_ON(sizeof(struct prestera_msg_vr_req) != 8); + BUILD_BUG_ON(sizeof(struct prestera_msg_lpm_req) != 36); /* structure that are part of req/resp fw messages */ BUILD_BUG_ON(sizeof(struct prestera_msg_iface) != 16); + BUILD_BUG_ON(sizeof(struct prestera_msg_ip_addr) != 20); /* check responses */ BUILD_BUG_ON(sizeof(struct prestera_msg_common_resp) != 8); @@ -1164,6 +1189,9 @@ prestera_acl_rule_add_put_action(struct prestera_msg_acl_action *action, case PRESTERA_ACL_RULE_ACTION_TRAP: /* just rule action id, no specific data */ break; + case PRESTERA_ACL_RULE_ACTION_JUMP: + action->jump.index = __cpu_to_le32(info->jump.index); + break; case PRESTERA_ACL_RULE_ACTION_COUNT: action->count.id = __cpu_to_le32(info->count.id); break; @@ -1891,6 +1919,33 @@ int prestera_hw_vr_delete(struct prestera_switch *sw, u16 vr_id) sizeof(req)); } +int prestera_hw_lpm_add(struct prestera_switch *sw, u16 vr_id, + __be32 dst, u32 dst_len, u32 grp_id) +{ + struct prestera_msg_lpm_req req = { + .dst_len = __cpu_to_le32(dst_len), + .vr_id = __cpu_to_le16(vr_id), + .grp_id = __cpu_to_le32(grp_id), + .dst.u.ipv4 = dst + }; + + return prestera_cmd(sw, PRESTERA_CMD_TYPE_ROUTER_LPM_ADD, &req.cmd, + sizeof(req)); +} + +int prestera_hw_lpm_del(struct prestera_switch *sw, u16 vr_id, + __be32 dst, u32 dst_len) +{ + struct prestera_msg_lpm_req req = { + .dst_len = __cpu_to_le32(dst_len), + .vr_id = __cpu_to_le16(vr_id), + .dst.u.ipv4 = dst + }; + + return prestera_cmd(sw, PRESTERA_CMD_TYPE_ROUTER_LPM_DELETE, &req.cmd, + sizeof(req)); +} + int prestera_hw_rxtx_init(struct prestera_switch *sw, struct prestera_rxtx_params *params) { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h index 3ff12bae5909..fd896a8838bb 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h @@ -249,6 +249,12 @@ int prestera_hw_rif_delete(struct prestera_switch *sw, u16 rif_id, int prestera_hw_vr_create(struct prestera_switch *sw, u16 *vr_id); int prestera_hw_vr_delete(struct prestera_switch *sw, u16 vr_id); +/* LPM PI */ +int prestera_hw_lpm_add(struct prestera_switch *sw, u16 vr_id, + __be32 dst, u32 dst_len, u32 grp_id); +int prestera_hw_lpm_del(struct prestera_switch *sw, u16 vr_id, + __be32 dst, u32 dst_len); + /* Event handlers */ int prestera_hw_event_handler_register(struct prestera_switch *sw, enum prestera_event_type type, diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 73cd0a4b7291..1402c7889e78 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -28,6 +28,12 @@ #define PRESTERA_MAC_ADDR_NUM_MAX 255 static struct workqueue_struct *prestera_wq; +static struct workqueue_struct *prestera_owq; + +void prestera_queue_work(struct work_struct *work) +{ + queue_work(prestera_owq, work); +} int prestera_port_pvid_set(struct prestera_port *port, u16 vid) { @@ -1025,12 +1031,17 @@ static int __init prestera_module_init(void) if (!prestera_wq) return -ENOMEM; + prestera_owq = alloc_ordered_workqueue("prestera_ordered", 0); + if (!prestera_owq) + return -ENOMEM; + return 0; } static void __exit prestera_module_exit(void) { destroy_workqueue(prestera_wq); + destroy_workqueue(prestera_owq); } module_init(prestera_module_init); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 6ef4d32b8fdd..6c5618cf4f08 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -5,10 +5,39 @@ #include <linux/types.h> #include <linux/inetdevice.h> #include <net/switchdev.h> +#include <linux/rhashtable.h> #include "prestera.h" #include "prestera_router_hw.h" +struct prestera_kern_fib_cache_key { + struct prestera_ip_addr addr; + u32 prefix_len; + u32 kern_tb_id; /* tb_id from kernel (not fixed) */ +}; + +/* Subscribing on neighbours in kernel */ +struct prestera_kern_fib_cache { + struct prestera_kern_fib_cache_key key; + struct { + struct prestera_fib_key fib_key; + enum prestera_fib_type fib_type; + } lpm_info; /* hold prepared lpm info */ + /* Indicate if route is not overlapped by another table */ + struct rhash_head ht_node; /* node of prestera_router */ + struct fib_info *fi; + u8 kern_tos; + u8 kern_type; + bool reachable; +}; + +static const struct rhashtable_params __prestera_kern_fib_cache_ht_params = { + .key_offset = offsetof(struct prestera_kern_fib_cache, key), + .head_offset = offsetof(struct prestera_kern_fib_cache, ht_node), + .key_len = sizeof(struct prestera_kern_fib_cache_key), + .automatic_shrinking = true, +}; + /* This util to be used, to convert kernel rules for default vr in hw_vr */ static u32 prestera_fix_tb_id(u32 tb_id) { @@ -20,6 +49,290 @@ static u32 prestera_fix_tb_id(u32 tb_id) return tb_id; } +static void +prestera_util_fen_info2fib_cache_key(struct fib_entry_notifier_info *fen_info, + struct prestera_kern_fib_cache_key *key) +{ + memset(key, 0, sizeof(*key)); + key->addr.u.ipv4 = cpu_to_be32(fen_info->dst); + key->prefix_len = fen_info->dst_len; + key->kern_tb_id = fen_info->tb_id; +} + +static struct prestera_kern_fib_cache * +prestera_kern_fib_cache_find(struct prestera_switch *sw, + struct prestera_kern_fib_cache_key *key) +{ + struct prestera_kern_fib_cache *fib_cache; + + fib_cache = + rhashtable_lookup_fast(&sw->router->kern_fib_cache_ht, key, + __prestera_kern_fib_cache_ht_params); + return fib_cache; +} + +static void +prestera_kern_fib_cache_destroy(struct prestera_switch *sw, + struct prestera_kern_fib_cache *fib_cache) +{ + fib_info_put(fib_cache->fi); + rhashtable_remove_fast(&sw->router->kern_fib_cache_ht, + &fib_cache->ht_node, + __prestera_kern_fib_cache_ht_params); + kfree(fib_cache); +} + +/* Operations on fi (offload, etc) must be wrapped in utils. + * This function just create storage. + */ +static struct prestera_kern_fib_cache * +prestera_kern_fib_cache_create(struct prestera_switch *sw, + struct prestera_kern_fib_cache_key *key, + struct fib_info *fi, u8 tos, u8 type) +{ + struct prestera_kern_fib_cache *fib_cache; + int err; + + fib_cache = kzalloc(sizeof(*fib_cache), GFP_KERNEL); + if (!fib_cache) + goto err_kzalloc; + + memcpy(&fib_cache->key, key, sizeof(*key)); + fib_info_hold(fi); + fib_cache->fi = fi; + fib_cache->kern_tos = tos; + fib_cache->kern_type = type; + + err = rhashtable_insert_fast(&sw->router->kern_fib_cache_ht, + &fib_cache->ht_node, + __prestera_kern_fib_cache_ht_params); + if (err) + goto err_ht_insert; + + return fib_cache; + +err_ht_insert: + fib_info_put(fi); + kfree(fib_cache); +err_kzalloc: + return NULL; +} + +static void +__prestera_k_arb_fib_lpm_offload_set(struct prestera_switch *sw, + struct prestera_kern_fib_cache *fc, + bool fail, bool offload, bool trap) +{ + struct fib_rt_info fri; + + if (fc->key.addr.v != PRESTERA_IPV4) + return; + + fri.fi = fc->fi; + fri.tb_id = fc->key.kern_tb_id; + fri.dst = fc->key.addr.u.ipv4; + fri.dst_len = fc->key.prefix_len; + fri.tos = fc->kern_tos; + fri.type = fc->kern_type; + /* flags begin */ + fri.offload = offload; + fri.trap = trap; + fri.offload_failed = fail; + /* flags end */ + fib_alias_hw_flags_set(&init_net, &fri); +} + +static int +__prestera_pr_k_arb_fc_lpm_info_calc(struct prestera_switch *sw, + struct prestera_kern_fib_cache *fc) +{ + memset(&fc->lpm_info, 0, sizeof(fc->lpm_info)); + + switch (fc->fi->fib_type) { + case RTN_UNICAST: + fc->lpm_info.fib_type = PRESTERA_FIB_TYPE_TRAP; + break; + /* Unsupported. Leave it for kernel: */ + case RTN_BROADCAST: + case RTN_MULTICAST: + /* Routes we must trap by design: */ + case RTN_LOCAL: + case RTN_UNREACHABLE: + case RTN_PROHIBIT: + fc->lpm_info.fib_type = PRESTERA_FIB_TYPE_TRAP; + break; + case RTN_BLACKHOLE: + fc->lpm_info.fib_type = PRESTERA_FIB_TYPE_DROP; + break; + default: + dev_err(sw->dev->dev, "Unsupported fib_type"); + return -EOPNOTSUPP; + } + + fc->lpm_info.fib_key.addr = fc->key.addr; + fc->lpm_info.fib_key.prefix_len = fc->key.prefix_len; + fc->lpm_info.fib_key.tb_id = prestera_fix_tb_id(fc->key.kern_tb_id); + + return 0; +} + +static int __prestera_k_arb_f_lpm_set(struct prestera_switch *sw, + struct prestera_kern_fib_cache *fc, + bool enabled) +{ + struct prestera_fib_node *fib_node; + + fib_node = prestera_fib_node_find(sw, &fc->lpm_info.fib_key); + if (fib_node) + prestera_fib_node_destroy(sw, fib_node); + + if (!enabled) + return 0; + + fib_node = prestera_fib_node_create(sw, &fc->lpm_info.fib_key, + fc->lpm_info.fib_type); + + if (!fib_node) { + dev_err(sw->dev->dev, "fib_node=NULL %pI4n/%d kern_tb_id = %d", + &fc->key.addr.u.ipv4, fc->key.prefix_len, + fc->key.kern_tb_id); + return -ENOENT; + } + + return 0; +} + +static int __prestera_k_arb_fc_apply(struct prestera_switch *sw, + struct prestera_kern_fib_cache *fc) +{ + int err; + + err = __prestera_pr_k_arb_fc_lpm_info_calc(sw, fc); + if (err) + return err; + + err = __prestera_k_arb_f_lpm_set(sw, fc, fc->reachable); + if (err) { + __prestera_k_arb_fib_lpm_offload_set(sw, fc, + true, false, false); + return err; + } + + switch (fc->lpm_info.fib_type) { + case PRESTERA_FIB_TYPE_TRAP: + __prestera_k_arb_fib_lpm_offload_set(sw, fc, false, + false, fc->reachable); + break; + case PRESTERA_FIB_TYPE_DROP: + __prestera_k_arb_fib_lpm_offload_set(sw, fc, false, true, + fc->reachable); + break; + case PRESTERA_FIB_TYPE_INVALID: + break; + } + + return 0; +} + +static struct prestera_kern_fib_cache * +__prestera_k_arb_util_fib_overlaps(struct prestera_switch *sw, + struct prestera_kern_fib_cache *fc) +{ + struct prestera_kern_fib_cache_key fc_key; + struct prestera_kern_fib_cache *rfc; + + /* TODO: parse kernel rules */ + rfc = NULL; + if (fc->key.kern_tb_id == RT_TABLE_LOCAL) { + memcpy(&fc_key, &fc->key, sizeof(fc_key)); + fc_key.kern_tb_id = RT_TABLE_MAIN; + rfc = prestera_kern_fib_cache_find(sw, &fc_key); + } + + return rfc; +} + +static struct prestera_kern_fib_cache * +__prestera_k_arb_util_fib_overlapped(struct prestera_switch *sw, + struct prestera_kern_fib_cache *fc) +{ + struct prestera_kern_fib_cache_key fc_key; + struct prestera_kern_fib_cache *rfc; + + /* TODO: parse kernel rules */ + rfc = NULL; + if (fc->key.kern_tb_id == RT_TABLE_MAIN) { + memcpy(&fc_key, &fc->key, sizeof(fc_key)); + fc_key.kern_tb_id = RT_TABLE_LOCAL; + rfc = prestera_kern_fib_cache_find(sw, &fc_key); + } + + return rfc; +} + +static int +prestera_k_arb_fib_evt(struct prestera_switch *sw, + bool replace, /* replace or del */ + struct fib_entry_notifier_info *fen_info) +{ + struct prestera_kern_fib_cache *tfib_cache, *bfib_cache; /* top/btm */ + struct prestera_kern_fib_cache_key fc_key; + struct prestera_kern_fib_cache *fib_cache; + int err; + + prestera_util_fen_info2fib_cache_key(fen_info, &fc_key); + fib_cache = prestera_kern_fib_cache_find(sw, &fc_key); + if (fib_cache) { + fib_cache->reachable = false; + err = __prestera_k_arb_fc_apply(sw, fib_cache); + if (err) + dev_err(sw->dev->dev, + "Applying destroyed fib_cache failed"); + + bfib_cache = __prestera_k_arb_util_fib_overlaps(sw, fib_cache); + tfib_cache = __prestera_k_arb_util_fib_overlapped(sw, fib_cache); + if (!tfib_cache && bfib_cache) { + bfib_cache->reachable = true; + err = __prestera_k_arb_fc_apply(sw, bfib_cache); + if (err) + dev_err(sw->dev->dev, + "Applying fib_cache btm failed"); + } + + prestera_kern_fib_cache_destroy(sw, fib_cache); + } + + if (replace) { + fib_cache = prestera_kern_fib_cache_create(sw, &fc_key, + fen_info->fi, + fen_info->tos, + fen_info->type); + if (!fib_cache) { + dev_err(sw->dev->dev, "fib_cache == NULL"); + return -ENOENT; + } + + bfib_cache = __prestera_k_arb_util_fib_overlaps(sw, fib_cache); + tfib_cache = __prestera_k_arb_util_fib_overlapped(sw, fib_cache); + if (!tfib_cache) + fib_cache->reachable = true; + + if (bfib_cache) { + bfib_cache->reachable = false; + err = __prestera_k_arb_fc_apply(sw, bfib_cache); + if (err) + dev_err(sw->dev->dev, + "Applying fib_cache btm failed"); + } + + err = __prestera_k_arb_fc_apply(sw, fib_cache); + if (err) + dev_err(sw->dev->dev, "Applying fib_cache failed"); + } + + return 0; +} + static int __prestera_inetaddr_port_event(struct net_device *port_dev, unsigned long event, struct netlink_ext_ack *extack) @@ -137,6 +450,89 @@ out: return notifier_from_errno(err); } +struct prestera_fib_event_work { + struct work_struct work; + struct prestera_switch *sw; + struct fib_entry_notifier_info fen_info; + unsigned long event; +}; + +static void __prestera_router_fib_event_work(struct work_struct *work) +{ + struct prestera_fib_event_work *fib_work = + container_of(work, struct prestera_fib_event_work, work); + struct prestera_switch *sw = fib_work->sw; + int err; + + rtnl_lock(); + + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + err = prestera_k_arb_fib_evt(sw, true, &fib_work->fen_info); + if (err) + goto err_out; + + break; + case FIB_EVENT_ENTRY_DEL: + err = prestera_k_arb_fib_evt(sw, false, &fib_work->fen_info); + if (err) + goto err_out; + + break; + } + + goto out; + +err_out: + dev_err(sw->dev->dev, "Error when processing %pI4h/%d", + &fib_work->fen_info.dst, + fib_work->fen_info.dst_len); +out: + fib_info_put(fib_work->fen_info.fi); + rtnl_unlock(); + kfree(fib_work); +} + +/* Called with rcu_read_lock() */ +static int __prestera_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct prestera_fib_event_work *fib_work; + struct fib_entry_notifier_info *fen_info; + struct fib_notifier_info *info = ptr; + struct prestera_router *router; + + if (info->family != AF_INET) + return NOTIFY_DONE; + + router = container_of(nb, struct prestera_router, fib_nb); + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + if (!fen_info->fi) + return NOTIFY_DONE; + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (WARN_ON(!fib_work)) + return NOTIFY_BAD; + + fib_info_hold(fen_info->fi); + fib_work->fen_info = *fen_info; + fib_work->event = event; + fib_work->sw = router->sw; + INIT_WORK(&fib_work->work, __prestera_router_fib_event_work); + prestera_queue_work(&fib_work->work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_DONE; +} + int prestera_router_init(struct prestera_switch *sw) { struct prestera_router *router; @@ -153,6 +549,11 @@ int prestera_router_init(struct prestera_switch *sw) if (err) goto err_router_lib_init; + err = rhashtable_init(&router->kern_fib_cache_ht, + &__prestera_kern_fib_cache_ht_params); + if (err) + goto err_kern_fib_cache_ht_init; + router->inetaddr_valid_nb.notifier_call = __prestera_inetaddr_valid_cb; err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb); if (err) @@ -163,11 +564,21 @@ int prestera_router_init(struct prestera_switch *sw) if (err) goto err_register_inetaddr_notifier; + router->fib_nb.notifier_call = __prestera_router_fib_event; + err = register_fib_notifier(&init_net, &router->fib_nb, + /* TODO: flush fib entries */ NULL, NULL); + if (err) + goto err_register_fib_notifier; + return 0; +err_register_fib_notifier: + unregister_inetaddr_notifier(&router->inetaddr_nb); err_register_inetaddr_notifier: unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); err_register_inetaddr_validator_notifier: + rhashtable_destroy(&router->kern_fib_cache_ht); +err_kern_fib_cache_ht_init: prestera_router_hw_fini(sw); err_router_lib_init: kfree(sw->router); @@ -178,6 +589,7 @@ void prestera_router_fini(struct prestera_switch *sw) { unregister_inetaddr_notifier(&sw->router->inetaddr_nb); unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb); + rhashtable_destroy(&sw->router->kern_fib_cache_ht); prestera_router_hw_fini(sw); kfree(sw->router); sw->router = NULL; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index e5592b69ad37..5b0cf3be9a9e 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -9,23 +9,41 @@ #include "prestera_acl.h" /* +--+ - * +------->|vr| - * | +--+ - * | - * +-+-------+ - * |rif_entry| - * +---------+ - * Rif is + * +------->|vr|<-+ + * | +--+ | + * | | + * +-+-------+ +--+---+-+ + * |rif_entry| |fib_node| + * +---------+ +--------+ + * Rif is Fib - is exit point * used as * entry point * for vr in hw */ +#define PRESTERA_NHGR_UNUSED (0) +#define PRESTERA_NHGR_DROP (0xFFFFFFFF) + +static const struct rhashtable_params __prestera_fib_ht_params = { + .key_offset = offsetof(struct prestera_fib_node, key), + .head_offset = offsetof(struct prestera_fib_node, ht_node), + .key_len = sizeof(struct prestera_fib_key), + .automatic_shrinking = true, +}; + int prestera_router_hw_init(struct prestera_switch *sw) { + int err; + + err = rhashtable_init(&sw->router->fib_ht, + &__prestera_fib_ht_params); + if (err) + goto err_fib_ht_init; + INIT_LIST_HEAD(&sw->router->vr_list); INIT_LIST_HEAD(&sw->router->rif_entry_list); +err_fib_ht_init: return 0; } @@ -33,6 +51,7 @@ void prestera_router_hw_fini(struct prestera_switch *sw) { WARN_ON(!list_empty(&sw->router->vr_list)); WARN_ON(!list_empty(&sw->router->rif_entry_list)); + rhashtable_destroy(&sw->router->fib_ht); } static struct prestera_vr *__prestera_vr_find(struct prestera_switch *sw, @@ -212,3 +231,102 @@ err_key_copy: err_kzalloc: return NULL; } + +struct prestera_fib_node * +prestera_fib_node_find(struct prestera_switch *sw, struct prestera_fib_key *key) +{ + struct prestera_fib_node *fib_node; + + fib_node = rhashtable_lookup_fast(&sw->router->fib_ht, key, + __prestera_fib_ht_params); + return fib_node; +} + +static void __prestera_fib_node_destruct(struct prestera_switch *sw, + struct prestera_fib_node *fib_node) +{ + struct prestera_vr *vr; + + vr = fib_node->info.vr; + prestera_hw_lpm_del(sw, vr->hw_vr_id, fib_node->key.addr.u.ipv4, + fib_node->key.prefix_len); + switch (fib_node->info.type) { + case PRESTERA_FIB_TYPE_TRAP: + break; + case PRESTERA_FIB_TYPE_DROP: + break; + default: + pr_err("Unknown fib_node->info.type = %d", + fib_node->info.type); + } + + prestera_vr_put(sw, vr); +} + +void prestera_fib_node_destroy(struct prestera_switch *sw, + struct prestera_fib_node *fib_node) +{ + __prestera_fib_node_destruct(sw, fib_node); + rhashtable_remove_fast(&sw->router->fib_ht, &fib_node->ht_node, + __prestera_fib_ht_params); + kfree(fib_node); +} + +struct prestera_fib_node * +prestera_fib_node_create(struct prestera_switch *sw, + struct prestera_fib_key *key, + enum prestera_fib_type fib_type) +{ + struct prestera_fib_node *fib_node; + u32 grp_id; + struct prestera_vr *vr; + int err; + + fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL); + if (!fib_node) + goto err_kzalloc; + + memcpy(&fib_node->key, key, sizeof(*key)); + fib_node->info.type = fib_type; + + vr = prestera_vr_get(sw, key->tb_id, NULL); + if (IS_ERR(vr)) + goto err_vr_get; + + fib_node->info.vr = vr; + + switch (fib_type) { + case PRESTERA_FIB_TYPE_TRAP: + grp_id = PRESTERA_NHGR_UNUSED; + break; + case PRESTERA_FIB_TYPE_DROP: + grp_id = PRESTERA_NHGR_DROP; + break; + default: + pr_err("Unsupported fib_type %d", fib_type); + goto err_nh_grp_get; + } + + err = prestera_hw_lpm_add(sw, vr->hw_vr_id, key->addr.u.ipv4, + key->prefix_len, grp_id); + if (err) + goto err_lpm_add; + + err = rhashtable_insert_fast(&sw->router->fib_ht, &fib_node->ht_node, + __prestera_fib_ht_params); + if (err) + goto err_ht_insert; + + return fib_node; + +err_ht_insert: + prestera_hw_lpm_del(sw, vr->hw_vr_id, key->addr.u.ipv4, + key->prefix_len); +err_lpm_add: +err_nh_grp_get: + prestera_vr_put(sw, vr); +err_vr_get: + kfree(fib_node); +err_kzalloc: + return NULL; +} diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h index b6b028551868..67dbb49c8bd4 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.h @@ -22,6 +22,42 @@ struct prestera_rif_entry { struct list_head router_node; /* ht */ }; +struct prestera_ip_addr { + union { + __be32 ipv4; + struct in6_addr ipv6; + } u; + enum { + PRESTERA_IPV4 = 0, + PRESTERA_IPV6 + } v; +}; + +struct prestera_fib_key { + struct prestera_ip_addr addr; + u32 prefix_len; + u32 tb_id; +}; + +struct prestera_fib_info { + struct prestera_vr *vr; + struct list_head vr_node; + enum prestera_fib_type { + PRESTERA_FIB_TYPE_INVALID = 0, + /* It can be connected route + * and will be overlapped with neighbours + */ + PRESTERA_FIB_TYPE_TRAP, + PRESTERA_FIB_TYPE_DROP + } type; +}; + +struct prestera_fib_node { + struct rhash_head ht_node; /* node of prestera_vr */ + struct prestera_fib_key key; + struct prestera_fib_info info; /* action related info */ +}; + struct prestera_rif_entry * prestera_rif_entry_find(const struct prestera_switch *sw, const struct prestera_rif_entry_key *k); @@ -31,6 +67,14 @@ struct prestera_rif_entry * prestera_rif_entry_create(struct prestera_switch *sw, struct prestera_rif_entry_key *k, u32 tb_id, const unsigned char *addr); +struct prestera_fib_node *prestera_fib_node_find(struct prestera_switch *sw, + struct prestera_fib_key *key); +void prestera_fib_node_destroy(struct prestera_switch *sw, + struct prestera_fib_node *fib_node); +struct prestera_fib_node * +prestera_fib_node_create(struct prestera_switch *sw, + struct prestera_fib_key *key, + enum prestera_fib_type fib_type); int prestera_router_hw_init(struct prestera_switch *sw); void prestera_router_hw_fini(struct prestera_switch *sw); diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 89ca7960b225..4cd0747edaff 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1556,6 +1556,7 @@ static int mtk_star_probe(struct platform_device *pdev) return devm_register_netdev(dev, ndev); } +#ifdef CONFIG_OF static const struct of_device_id mtk_star_of_match[] = { { .compatible = "mediatek,mt8516-eth", }, { .compatible = "mediatek,mt8518-eth", }, @@ -1563,6 +1564,7 @@ static const struct of_device_id mtk_star_of_match[] = { { } }; MODULE_DEVICE_TABLE(of, mtk_star_of_match); +#endif static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops, mtk_star_suspend, mtk_star_resume); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index fcfd38fa9e6c..b94cca45eb78 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o en/fs_tt_redirect.o + en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o # # Netdev extra @@ -103,6 +103,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_icm_pool.o steering/dr_buddy.o \ steering/dr_ste.o steering/dr_send.o \ steering/dr_ste_v0.o steering/dr_ste_v1.o \ + steering/dr_ste_v2.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o \ steering/dr_dbg.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3eacd8739929..989e7cb48a95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -191,10 +191,10 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block) int xor_len = sizeof(*block) - sizeof(block->data) - 1; if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) - return -EINVAL; + return -EHWPOISON; if (xor8_buf(block, 0, sizeof(*block)) != 0xff) - return -EINVAL; + return -EHWPOISON; return 0; } @@ -260,12 +260,12 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent) sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); if (sig != 0xff) - return -EINVAL; + return -EHWPOISON; for (i = 0; i < n && next; i++) { err = verify_block_sig(next->buf); if (err) - return err; + return -EHWPOISON; next = next->next; } @@ -480,7 +480,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ALLOC_SF: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; - return -EIO; + return -ENOLINK; default: mlx5_core_err(dev, "Unknown FW command (%d)\n", op); return -EINVAL; @@ -761,44 +761,72 @@ struct mlx5_ifc_mbox_in_bits { u8 reserved_at_40[0x40]; }; -void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome) +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) { - *status = MLX5_GET(mbox_out, out, status); - *syndrome = MLX5_GET(mbox_out, out, syndrome); + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + mlx5_core_err_rl(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, cmd_status_to_err(status)); } +EXPORT_SYMBOL(mlx5_cmd_out_err); -static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out) +static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { + u16 opcode, op_mod; u32 syndrome; u8 status; - u16 opcode; - u16 op_mod; u16 uid; + int err; - mlx5_cmd_mbox_status(out, &status, &syndrome); - if (!status) - return 0; + syndrome = MLX5_GET(mbox_out, out, syndrome); + status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); + err = cmd_status_to_err(status); + if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) - mlx5_core_err_rl(dev, - "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", - mlx5_command_str(opcode), opcode, op_mod, - cmd_status_str(status), status, syndrome); + mlx5_cmd_out_err(dev, opcode, op_mod, out); else mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", - mlx5_command_str(opcode), - opcode, op_mod, - cmd_status_str(status), - status, - syndrome); + "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, uid, + cmd_status_str(status), status, syndrome, err); +} - return cmd_status_to_err(status); +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) +{ + /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */ + if (err == -ENXIO) { + u16 opcode = MLX5_GET(mbox_in, in, opcode); + u32 syndrome; + u8 status; + + /* PCI Error, emulate command return status, for smooth reset */ + err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, syndrome); + if (!err) + return 0; + } + + /* driver or FW delivery error */ + if (err != -EREMOTEIO && err) + return err; + + /* check outbox status */ + err = cmd_status_to_err(MLX5_GET(mbox_out, out, status)); + if (err) + cmd_status_print(dev, in, out); + + return err; } +EXPORT_SYMBOL(mlx5_cmd_check); static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) @@ -981,13 +1009,7 @@ static void cmd_work_handler(struct work_struct *work) /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { - u8 status = 0; - u32 drv_synd; - - ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); - MLX5_SET(mbox_out, ent->out, status, status); - MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); - + ent->ret = -ENXIO; mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); return; } @@ -1006,6 +1028,31 @@ static void cmd_work_handler(struct work_struct *work) } } +static int deliv_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + case MLX5_DRIVER_STATUS_ABORTED: + return 0; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return -EBADR; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return -EFAULT; /* Bad address */ + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return -ENOMSG; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return -EIO; + default: + return -EINVAL; + } +} + static const char *deliv_status_to_str(u8 status) { switch (status) { @@ -1102,16 +1149,27 @@ out_err: /* Notes: * 1. Callback functions may not sleep * 2. page queue commands do not support asynchrous completion + * + * return value in case (!callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret > 0 : Command execution couldn't be performed by firmware + * ret == 0: Command was executed by FW, Caller must check FW outbox status. + * + * return value in case (callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret == 0: Command will be submitted to FW for execution + * and the callback will be called for further status updates */ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, - void *context, int page_queue, u8 *status, + void *context, int page_queue, u8 token, bool force_polling) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; struct mlx5_cmd_stats *stats; + u8 status = 0; int err = 0; s64 ds; u16 op; @@ -1142,12 +1200,12 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, cmd_work_handler(&ent->work); } else if (!queue_work(cmd->wq, &ent->work)) { mlx5_core_warn(dev, "failed to queue work\n"); - err = -ENOMEM; + err = -EALREADY; goto out_free; } if (callback) - goto out; /* mlx5_cmd_comp_handler() will put(ent) */ + return 0; /* mlx5_cmd_comp_handler() will put(ent) */ err = wait_func(dev, ent); if (err == -ETIMEDOUT || err == -ECANCELED) @@ -1165,12 +1223,11 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, "fw exec time for %s is %lld nsec\n", mlx5_command_str(op), ds); - *status = ent->status; out_free: + status = ent->status; cmd_ent_put(ent); -out: - return err; + return err ? : status; } static ssize_t dbg_write(struct file *filp, const char __user *buf, @@ -1487,7 +1544,7 @@ static void create_debugfs_files(struct mlx5_core_dev *dev) { struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); + dbg->dbg_root = debugfs_create_dir("cmd", mlx5_debugfs_get_dev_root(dev)); debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); @@ -1613,15 +1670,15 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); dump_command(dev, ent, 0); - if (!ent->ret) { + + if (vec & MLX5_TRIGGERED_CMD_COMP) + ent->ret = -ENXIO; + + if (!ent->ret) { /* Command completed by FW */ if (!cmd->checksum_disabled) ent->ret = verify_signature(ent); - else - ent->ret = 0; - if (vec & MLX5_TRIGGERED_CMD_COMP) - ent->status = MLX5_DRIVER_STATUS_ABORTED; - else - ent->status = ent->lay->status_own >> 1; + + ent->status = ent->lay->status_own >> 1; mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); @@ -1639,21 +1696,18 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force callback = ent->callback; context = ent->context; - err = ent->ret; - if (!err) { + err = ent->ret ? : ent->status; + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + + if (!err) err = mlx5_copy_from_msg(ent->uout, ent->out, ent->uout_size); - err = err ? err : mlx5_cmd_check(dev, - ent->in->first.data, - ent->uout); - } - mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); - err = err ? err : ent->status; /* final consumer is done, release ent */ cmd_ent_put(ent); callback(err, context); @@ -1720,31 +1774,6 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) up(&cmd->sem); } -static int status_to_err(u8 status) -{ - switch (status) { - case MLX5_CMD_DELIVERY_STAT_OK: - case MLX5_DRIVER_STATUS_ABORTED: - return 0; - case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: - case MLX5_CMD_DELIVERY_STAT_TOK_ERR: - return -EBADR; - case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: - case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: - case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: - return -EFAULT; /* Bad address */ - case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: - case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: - case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: - case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: - return -ENOMSG; - case MLX5_CMD_DELIVERY_STAT_FW_ERR: - return -EIO; - default: - return -EINVAL; - } -} - static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, gfp_t gfp) { @@ -1788,27 +1817,23 @@ static int is_manage_pages(void *in) return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; } +/* Notes: + * 1. Callback functions may not sleep + * 2. Page queue commands do not support asynchrous completion + */ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context, bool force_polling) { - struct mlx5_cmd_msg *inb; - struct mlx5_cmd_msg *outb; + u16 opcode = MLX5_GET(mbox_in, in, opcode); + struct mlx5_cmd_msg *inb, *outb; int pages_queue; gfp_t gfp; - int err; - u8 status = 0; - u32 drv_synd; - u16 opcode; u8 token; + int err; - opcode = MLX5_GET(mbox_in, in, opcode); - if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) { - err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); - MLX5_SET(mbox_out, out, status, status); - MLX5_SET(mbox_out, out, syndrome, drv_synd); - return err; - } + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) + return -ENXIO; pages_queue = is_manage_pages(in); gfp = callback ? GFP_ATOMIC : GFP_KERNEL; @@ -1834,39 +1859,133 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status, token, force_polling); + pages_queue, token, force_polling); + if (callback) + return err; + + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + if (err) goto out_out; - mlx5_core_dbg(dev, "err %d, status %d\n", err, status); - if (status) { - err = status_to_err(status); - goto out_out; + /* command completed by FW */ + err = mlx5_copy_from_msg(out, outb, out_size); +out_out: + mlx5_free_cmd_msg(dev, outb); +out_in: + free_msg(dev, inb); + return err; +} + +static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, int err) +{ + struct mlx5_cmd_stats *stats; + + if (!err) + return; + + stats = &dev->cmd.stats[opcode]; + spin_lock_irq(&stats->lock); + stats->failed++; + if (err < 0) + stats->last_failed_errno = -err; + if (err == -EREMOTEIO) { + stats->failed_mbox_status++; + stats->last_failed_mbox_status = status; } + spin_unlock_irq(&stats->lock); +} - if (!callback) - err = mlx5_copy_from_msg(out, outb, out_size); +/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) +{ + u8 status = MLX5_GET(mbox_out, out, status); -out_out: - if (!callback) - mlx5_free_cmd_msg(dev, outb); + if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ + err = -EIO; -out_in: - if (!callback) - free_msg(dev, inb); + if (!err && status != MLX5_CMD_STAT_OK) + err = -EREMOTEIO; + + cmd_status_log(dev, opcode, status, err); return err; } +/** + * mlx5_cmd_do - Executes a fw command, wait for completion. + * Unlike mlx5_cmd_exec, this function will not translate or intercept + * outbox.status and will return -EREMOTEIO when + * outbox.status != MLX5_CMD_STAT_OK + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: + * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK. + * Caller must check FW outbox status. + * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK. + * < 0 : Command execution couldn't be performed by firmware or driver + */ +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); + u16 opcode = MLX5_GET(mbox_in, in, opcode); + + err = cmd_status_err(dev, err, opcode, out); + return err; +} +EXPORT_SYMBOL(mlx5_cmd_do); + +/** + * mlx5_cmd_exec - Executes a fw command, wait for completion + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { - int err; + int err = mlx5_cmd_do(dev, in, in_size, out, out_size); - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); - return err ? : mlx5_cmd_check(dev, in, out); + return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); +/** + * mlx5_cmd_exec_polling - Executes a fw command, poll for completion + * Needed for driver force teardown, when command completion EQ + * will not be available to complete the command + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + u16 opcode = MLX5_GET(mbox_in, in, opcode); + + err = cmd_status_err(dev, err, opcode, out); + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, struct mlx5_async_ctx *ctx) { @@ -1895,8 +2014,10 @@ EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); static void mlx5_cmd_exec_cb_handler(int status, void *_work) { struct mlx5_async_work *work = _work; - struct mlx5_async_ctx *ctx = work->ctx; + struct mlx5_async_ctx *ctx; + ctx = work->ctx; + status = cmd_status_err(ctx->dev, status, work->opcode, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) wake_up(&ctx->wait); @@ -1910,6 +2031,8 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; + work->opcode = MLX5_GET(mbox_in, in, opcode); + work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; ret = cmd_exec(ctx->dev, in, in_size, out, out_size, @@ -1921,17 +2044,6 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, } EXPORT_SYMBOL(mlx5_cmd_exec_cb); -int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size) -{ - int err; - - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); - - return err ? : mlx5_cmd_check(dev, in, out); -} -EXPORT_SYMBOL(mlx5_cmd_exec_polling); - static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct cmd_msg_cache *ch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 5371ad0a12eb..15a74966be7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -86,8 +86,9 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, spin_unlock_irqrestore(&tasklet_ctx->lock, flags); } -int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - u32 *in, int inlen, u32 *out, int outlen) +/* Callers must verify outbox status in case of err */ +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) { int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn_or_apu_element); @@ -101,7 +102,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, memset(out, 0, outlen); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); if (err) return err; @@ -148,6 +149,16 @@ err_cmd: mlx5_cmd_exec_in(dev, destroy_cq, din); return err; } +EXPORT_SYMBOL(mlx5_create_cq); + +/* oubox is checked and err val is normalized */ +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) +{ + int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen); + + return mlx5_cmd_check(dev, err, in, out); +} EXPORT_SYMBOL(mlx5_core_create_cq); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 10d195042ab5..d69bac93a83b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -99,26 +99,32 @@ void mlx5_unregister_debugfs(void) debugfs_remove(mlx5_debugfs_root); } +struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev) +{ + return dev->priv.dbg.dbg_root; +} +EXPORT_SYMBOL(mlx5_debugfs_get_dev_root); + void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) { - dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root); + dev->priv.dbg.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg.dbg_root); } EXPORT_SYMBOL(mlx5_qp_debugfs_init); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.qp_debugfs); + debugfs_remove_recursive(dev->priv.dbg.qp_debugfs); } EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup); void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) { - dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root); + dev->priv.dbg.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg.dbg_root); } void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.eq_debugfs); + debugfs_remove_recursive(dev->priv.dbg.eq_debugfs); } static ssize_t average_read(struct file *filp, char __user *buf, size_t count, @@ -168,8 +174,8 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) const char *namep; int i; - cmd = &dev->priv.cmdif_debugfs; - *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); + cmd = &dev->priv.dbg.cmdif_debugfs; + *cmd = debugfs_create_dir("commands", dev->priv.dbg.dbg_root); for (i = 0; i < MLX5_CMD_OP_MAX; i++) { stats = &dev->cmd.stats[i]; @@ -180,23 +186,51 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_file("average", 0400, stats->root, stats, &stats_fops); debugfs_create_u64("n", 0400, stats->root, &stats->n); + debugfs_create_u64("failed", 0400, stats->root, &stats->failed); + debugfs_create_u64("failed_mbox_status", 0400, stats->root, + &stats->failed_mbox_status); + debugfs_create_u32("last_failed_errno", 0400, stats->root, + &stats->last_failed_errno); + debugfs_create_u8("last_failed_mbox_status", 0400, stats->root, + &stats->last_failed_mbox_status); } } } void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.cmdif_debugfs); + debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs); } void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) { - dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root); + dev->priv.dbg.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg.dbg_root); } void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) { - debugfs_remove_recursive(dev->priv.cq_debugfs); + debugfs_remove_recursive(dev->priv.dbg.cq_debugfs); +} + +void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) +{ + struct dentry *pages; + + dev->priv.dbg.pages_debugfs = debugfs_create_dir("pages", dev->priv.dbg.dbg_root); + pages = dev->priv.dbg.pages_debugfs; + + debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); + debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages); + debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages); + debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); + debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped); + debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages, + &dev->priv.reclaim_pages_discard); +} + +void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.pages_debugfs); } static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, @@ -441,7 +475,7 @@ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) if (!mlx5_debugfs_root) return 0; - err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs, + err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.dbg.qp_debugfs, &qp->dbg, qp->qpn, qp_fields, ARRAY_SIZE(qp_fields), qp); if (err) @@ -468,7 +502,7 @@ int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (!mlx5_debugfs_root) return 0; - err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs, + err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.dbg.eq_debugfs, &eq->dbg, eq->eqn, eq_fields, ARRAY_SIZE(eq_fields), eq); if (err) @@ -493,7 +527,7 @@ int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) if (!mlx5_debugfs_root) return 0; - err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs, + err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.dbg.cq_debugfs, &cq->dbg, cq->cqn, cq_fields, ARRAY_SIZE(cq_fields), cq); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d1093bb2d436..057dde6f4417 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -100,15 +100,11 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli } net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); - err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive); + err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack); if (err) - goto out; + return err; - err = mlx5_fw_reset_wait_reset_done(dev); -out: - if (err) - NL_SET_ERR_MSG_MOD(extack, "FW activate command failed"); - return err; + return mlx5_fw_reset_wait_reset_done(dev); } static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c14e06ca64d8..2704c7537481 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -59,6 +59,7 @@ #include "lib/hv_vhca.h" #include "lib/clock.h" #include "en/rx_res.h" +#include "en/selq.h" extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; @@ -172,8 +173,9 @@ struct page_pool; #define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) -#define MLX5E_MAX_KLM_PER_WQE \ - MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE) +#define MLX5E_MAX_KLM_PER_WQE(mdev) \ + MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \ + << MLX5_MKEY_BSF_OCTO_SIZE) #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -221,6 +223,32 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } +/* The maximum WQE size can be retrieved by max_wqe_sz_sq in + * bytes units. Driver hardens the limitation to 1KB (16 + * WQEBBs), unless firmware capability is stricter. + */ +static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) +{ + return min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS, + MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); +} + +static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs) +{ +/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. + * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, + * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64) + * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower + * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be + * cache-aligned. + */ +#if L1_CACHE_BYTES < 128 + return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); +#else + return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2); +#endif +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -427,12 +455,12 @@ struct mlx5e_txqsq { struct netdev_queue *txq; u32 sqn; u16 stop_room; + u16 max_sq_mpw_wqebbs; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; unsigned long state; unsigned int hw_mtu; - struct hwtstamp_config *tstamp; struct mlx5_clock *clock; struct net_device *netdev; struct mlx5_core_dev *mdev; @@ -446,6 +474,7 @@ struct mlx5e_txqsq { struct work_struct recover_work; struct mlx5e_ptpsq *ptpsq; cqe_ts_to_ns ptp_cyc2time; + u16 max_sq_wqebbs; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -540,6 +569,8 @@ struct mlx5e_xdpsq { u32 sqn; struct device *pdev; __be32 mkey_be; + u16 stop_room; + u16 max_sq_mpw_wqebbs; u8 min_inline_mode; unsigned long state; unsigned int hw_mtu; @@ -547,6 +578,7 @@ struct mlx5e_xdpsq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; + u16 max_sq_wqebbs; } ____cacheline_aligned_in_smp; struct mlx5e_ktls_resync_resp; @@ -575,6 +607,7 @@ struct mlx5e_icosq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; + u16 max_sq_wqebbs; struct work_struct recover_work; } ____cacheline_aligned_in_smp; @@ -681,6 +714,7 @@ struct mlx5e_rq { u8 umr_in_progress; u8 umr_last_bulk; u8 umr_completed; + u8 min_wqe_bulk; struct mlx5e_shampo_hd *shampo; } mpwqe; }; @@ -876,9 +910,8 @@ struct mlx5e_trap; struct mlx5e_priv { /* priv data path fields - start */ + struct mlx5e_selq selq; struct mlx5e_txqsq **txq2sq; - int **channel_tc2realtxq; - int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx_dp dcbx_dp; #endif @@ -921,7 +954,6 @@ struct mlx5e_priv { u16 drop_rq_q_counter; struct notifier_block events_nb; struct notifier_block blocking_events_nb; - int num_tc_x_num_ch; struct udp_tunnel_nic_info nic_info; #ifdef CONFIG_MLX5_CORE_EN_DCB diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 66180ffb4606..0bd8698f7226 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -178,6 +178,12 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); } +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz) +{ +#define UMR_WQE_BULK (2) + return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1); +} + u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -196,13 +202,13 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par u16 stop_room; stop_room = mlx5e_tls_get_stop_room(mdev, params); - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); if (is_mpwqe) /* A MPWQE can take up to the maximum-sized WQE + all the normal * stop room can be taken if a new packet breaks the active * MPWQE session and allocates its WQEs right away. */ - stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); return stop_room; } @@ -359,12 +365,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, { /* Prefer Striding RQ, unless any of the following holds: * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. + * - CQE compression is ON, and stride_index mini_cqe layout is not supported. * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. * * No XSK params: checking the availability of striding RQ in general. */ - if (!slow_pci_heuristic(mdev) && + if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) || + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) && mlx5e_striding_rq_possible(mdev, params) && (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || !mlx5e_rx_is_linear_skb(params, NULL))) @@ -717,7 +724,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); u32 wqebbs; - max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE; + max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; rest = max_hd_per_wqe % max_klm_per_umr; @@ -774,10 +781,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(mdev, param); - param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ + param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */ param->is_tls = mlx5e_accel_is_ktls_rx(mdev); if (param->is_tls) - param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */ + param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); MLX5_SET(wq, wq, log_wq_sz, log_wq_size); mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 433e6967692d..47a368112e31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -129,6 +129,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 82baafd3c00c..335b20b6383b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -195,7 +195,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->tstamp = c->tstamp; sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; @@ -449,7 +448,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + param->stop_room = mlx5e_stop_room_for_max_wqe(mdev); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index c1e07496c89c..9db677e9ca9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -50,7 +50,6 @@ static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv) struct mlx5e_qos_node { struct hlist_node hnode; - struct rcu_head rcu; struct mlx5e_qos_node *parent; u64 rate; u32 bw_share; @@ -132,7 +131,11 @@ static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node __clear_bit(node->qid, priv->htb.qos_used_qids); mlx5e_update_tx_netdev_queues(priv); } - kfree_rcu(node, rcu); + /* Make sure this qid is no longer selected by mlx5e_select_queue, so + * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue. + */ + synchronize_net(); + kfree(node); } /* TX datapath API */ @@ -273,10 +276,18 @@ err_free_sq: static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) { struct mlx5e_txqsq *sq; + u16 qid; sq = mlx5e_get_qos_sq(priv, node->qid); - WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq); + qid = mlx5e_qid_from_qos(&priv->channels, node->qid); + + /* If it's a new queue, it will be marked as started at this point. + * Stop it before updating txq2sq. + */ + mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid)); + + priv->txq2sq[qid] = sq; /* Make the change to txq2sq visible before the queue is started. * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, @@ -299,8 +310,13 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); mlx5e_deactivate_txqsq(sq); - /* The queue is disabled, no synchronization with datapath is needed. */ priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; + + /* Make the change to txq2sq visible before the queue is started again. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); } static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) @@ -485,9 +501,11 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (opened) { + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, true); + err = mlx5e_qos_alloc_queues(priv, &priv->channels); if (err) - return err; + goto err_cancel_selq; } root = mlx5e_sw_node_create_root(priv); @@ -508,6 +526,9 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, */ smp_store_release(&priv->htb.maj_id, htb_maj_id); + if (opened) + mlx5e_selq_apply(&priv->selq); + return 0; err_sw_node_delete: @@ -516,6 +537,8 @@ err_sw_node_delete: err_free_queues: if (opened) mlx5e_qos_close_all_queues(&priv->channels); +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); return err; } @@ -526,8 +549,15 @@ int mlx5e_htb_root_del(struct mlx5e_priv *priv) qos_dbg(priv->mdev, "TC_HTB_DESTROY\n"); + /* Wait until real_num_tx_queues is updated for mlx5e_select_queue, + * so that we can safely switch to its non-HTB non-PTP fastpath. + */ + synchronize_net(); + + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, false); + mlx5e_selq_apply(&priv->selq); + WRITE_ONCE(priv->htb.maj_id, 0); - synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */ root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT); if (!root) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 0991345c4ae5..86fa0bdbee36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -263,14 +263,14 @@ int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) INIT_LIST_HEAD(&uplink_priv->unready_flows); /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + err = mlx5e_tc_esw_init(uplink_priv); return err; } void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) { /* delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv); mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index c1cdd8c2e37a..7f93426b88b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -442,7 +442,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, goto inner_tir; err = mlx5e_tir_modify(tir, builder); if (err) { - mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n", mlx5e_tir_get_tirn(tir), tt, err); if (!final_err) final_err = err; @@ -457,7 +457,7 @@ inner_tir: continue; err = mlx5e_tir_modify(tir, builder); if (err) { - mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n", mlx5e_tir_get_tirn(tir), tt, err); if (!final_err) final_err = err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c new file mode 100644 index 000000000000..d98a277eb7f8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "selq.h" +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include "en.h" +#include "en/ptp.h" + +struct mlx5e_selq_params { + unsigned int num_regular_queues; + unsigned int num_channels; + unsigned int num_tcs; + union { + u8 is_special_queues; + struct { + bool is_htb : 1; + bool is_ptp : 1; + }; + }; +}; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) +{ + struct mlx5e_selq_params *init_params; + + selq->state_lock = state_lock; + + selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL); + if (!selq->standby) + return -ENOMEM; + + init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL); + if (!init_params) { + kvfree(selq->standby); + selq->standby = NULL; + return -ENOMEM; + } + /* Assign dummy values, so that mlx5e_select_queue won't crash. */ + *init_params = (struct mlx5e_selq_params) { + .num_regular_queues = 1, + .num_channels = 1, + .num_tcs = 1, + .is_htb = false, + .is_ptp = false, + }; + rcu_assign_pointer(selq->active, init_params); + + return 0; +} + +void mlx5e_selq_cleanup(struct mlx5e_selq *selq) +{ + WARN_ON_ONCE(selq->is_prepared); + + kvfree(selq->standby); + selq->standby = NULL; + selq->is_prepared = true; + + mlx5e_selq_apply(selq); + + kvfree(selq->standby); + selq->standby = NULL; +} + +void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq->standby->num_channels = params->num_channels; + selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params); + selq->standby->num_regular_queues = + selq->standby->num_channels * selq->standby->num_tcs; + selq->standby->is_htb = htb; + selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS); +} + +void mlx5e_selq_apply(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *old_params; + + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; + + old_params = rcu_replace_pointer(selq->active, selq->standby, + lockdep_is_held(selq->state_lock)); + synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */ + selq->standby = old_params; +} + +void mlx5e_selq_cancel(struct mlx5e_selq *selq) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; +} + +#ifdef CONFIG_MLX5_CORE_EN_DCB +static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + +static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP) + return mlx5e_get_dscp_up(priv, skb); +#endif + if (skb_vlan_tag_present(skb)) + return skb_vlan_tag_get_prio(skb); + return 0; +} + +static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int up; + + up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0; + + return selq->num_regular_queues + up; +} + +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + u16 classid; + + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + if ((TC_H_MAJ(skb->priority) >> 16) == smp_load_acquire(&priv->htb.maj_id)) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(priv->htb.defcls); + + if (!classid) + return 0; + + return mlx5e_get_txq_by_classid(priv, classid); +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_selq_params *selq; + int txq_ix, up; + + selq = rcu_dereference_bh(priv->selq.active); + + /* This is a workaround needed only for the mlx5e_netdev_change_profile + * flow that zeroes out the whole priv without unregistering the netdev + * and without preventing ndo_select_queue from being called. + */ + if (unlikely(!selq)) + return 0; + + if (likely(!selq->is_special_queues)) { + /* No special queues, netdev_pick_tx returns one of the regular ones. */ + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. + */ + return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) + + up * selq->num_channels; + } + + if (unlikely(selq->is_htb)) { + /* num_tcs == 1, shortcut for PTP */ + + txq_ix = mlx5e_select_htb_queue(priv, skb); + if (txq_ix > 0) + return txq_ix; + + if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb))) + return selq->num_channels; + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. + * If they are selected, switch to regular queues. + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). + */ + return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels); + } + + /* PTP is enabled */ + + if (mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb, selq); + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Normalize any picked txq_ix to [0, num_channels). Queues in range + * [0, num_regular_queues) will be mapped to the corresponding channel + * index, so that we can apply the packet's UP (if num_tcs > 1). + * If netdev_pick_tx() picks ptp_channel, switch to a regular queue, + * because driver should select the PTP only at mlx5e_select_ptpsq(). + */ + txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + return txq_ix + up * selq->num_channels; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h new file mode 100644 index 000000000000..6c070141d8f1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_SELQ_H__ +#define __MLX5_EN_SELQ_H__ + +#include <linux/kernel.h> + +struct mlx5e_selq_params; + +struct mlx5e_selq { + struct mlx5e_selq_params __rcu *active; + struct mlx5e_selq_params *standby; + struct mutex *state_lock; /* points to priv->state_lock */ + bool is_prepared; +}; + +struct mlx5e_params; +struct net_device; +struct sk_buff; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock); +void mlx5e_selq_cleanup(struct mlx5e_selq *selq); +void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb); +void mlx5e_selq_apply(struct mlx5e_selq *selq); +void mlx5e_selq_cancel(struct mlx5e_selq *selq); + +static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels) +{ + while (unlikely(txq >= num_channels)) + txq -= num_channels; + return txq; +} + +static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels) +{ + if (unlikely(txq >= num_channels)) { + if (unlikely(txq >= num_channels << 3)) + txq %= num_channels; + else + do + txq -= num_channels; + while (txq >= num_channels); + } + return txq; +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); + +#endif /* __MLX5_EN_SELQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c index b0de6b999675..21aab96357b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -18,9 +19,8 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_ACCEPT; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index e600924e30ea..cb8f7593a00c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -2,6 +2,7 @@ // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "act.h" +#include "en/tc/post_act.h" #include "en/tc_priv.h" #include "mlx5_core.h" @@ -101,3 +102,75 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, parse_state->num_actions = flow_action->num_entries; parse_state->extack = extack; } + +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder) +{ + struct flow_action_entry *act; + int i, j = 0; + + flow_action_for_each(i, act, flow_action) { + /* Add CT action to be first. */ + if (act->id == FLOW_ACTION_CT) + flow_action_reorder->entries[j++] = act; + } + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT) + continue; + flow_action_reorder->entries[j++] = act; + } +} + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct flow_action_entry *act; + struct mlx5e_tc_act *tc_act; + struct mlx5e_priv *priv; + int err = 0, i; + + priv = parse_state->flow->priv; + + flow_action_for_each(i, act, flow_action) { + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act || !tc_act->post_parse || + !tc_act->can_offload(parse_state, act, i, attr)) + continue; + + err = tc_act->post_parse(parse_state, priv, attr); + if (err) + goto out; + } + +out: + return err; +} + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr) +{ + struct mlx5_core_dev *mdev = flow->priv->mdev; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + int err; + + mod_acts = &attr->parse_attr->mod_hdr_acts; + + /* Set handle on current post act rule to next post act rule. */ + err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts); + if (err) { + mlx5_core_warn(mdev, "Failed setting post action handle"); + return err; + } + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 9cc844bd00f5..94a7cf38d6b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -16,6 +16,8 @@ struct mlx5e_tc_act_parse_state { unsigned int num_actions; struct mlx5e_tc_flow *flow; struct netlink_ext_ack *extack; + u32 actions; + bool ct; bool ct_clear; bool encap; bool decap; @@ -23,7 +25,6 @@ struct mlx5e_tc_act_parse_state { bool ptype_host; const struct ip_tunnel_info *tun_info; struct mlx5e_mpls_info mpls_info; - struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; int if_count; struct mlx5_tc_ct_priv *ct_priv; @@ -32,7 +33,8 @@ struct mlx5e_tc_act_parse_state { struct mlx5e_tc_act { bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index); + int act_index, + struct mlx5_flow_attr *attr); int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -42,6 +44,15 @@ struct mlx5e_tc_act { int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr); + + bool (*is_multi_table_act)(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr); +}; + +struct mlx5e_tc_flow_action { + unsigned int num_entries; + struct flow_action_entry **entries; }; extern struct mlx5e_tc_act mlx5e_tc_act_drop; @@ -74,4 +85,19 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, struct flow_action *flow_action, struct netlink_ext_ack *extack); +void +mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action, + struct mlx5e_tc_flow_action *flow_action_reorder); + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type); + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr); + #endif /* __MLX5_EN_TC_ACT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c index 29920ef0180a..c0f08ae6a57f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c @@ -38,11 +38,12 @@ csum_offload_supported(struct mlx5e_priv *priv, static bool tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct mlx5e_tc_flow *flow = parse_state->flow; - return csum_offload_supported(flow->priv, flow->attr->action, + return csum_offload_supported(flow->priv, attr->action, act->csum_flags, parse_state->extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index 58cc33f1363d..b9d38fe807df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -8,13 +8,14 @@ static bool tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; struct netlink_ext_ack *extack = parse_state->extack; - if (flow_flag_test(parse_state->flow, SAMPLE)) { - NL_SET_ERR_MSG_MOD(extack, - "Sample action with connection tracking is not supported"); + if (parse_state->ct && !clear_action) { + NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported"); return false; } @@ -40,18 +41,34 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - flow_flag_set(parse_state->flow, CT); if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; + if (!clear_action) { + attr->flags |= MLX5_ATTR_FLAG_CT; + flow_flag_set(parse_state->flow, CT); + parse_state->ct = true; + } parse_state->ct_clear = clear_action; return 0; } +static bool +tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + if (act->ct.action & TCA_CT_ACT_CLEAR) + return false; + + return true; +} + struct mlx5e_tc_act mlx5e_tc_act_ct = { .can_offload = tc_act_can_offload_ct, .parse_action = tc_act_parse_ct, + .is_multi_table_act = tc_act_is_multi_table_act_ct, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c index 2e29a23bed12..dd025a95c439 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -18,8 +19,7 @@ tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c index f44515061228..4726bcb46eec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -8,6 +8,7 @@ static int validate_goto_chain(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { @@ -32,7 +33,7 @@ validate_goto_chain(struct mlx5e_priv *priv, } if (!mlx5_chains_backwards_supported(chains) && - dest_chain <= flow->attr->chain) { + dest_chain <= attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported"); return -EOPNOTSUPP; } @@ -43,8 +44,8 @@ validate_goto_chain(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (flow->attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_DECAP) && + if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && !reformat_and_fwd) { NL_SET_ERR_MSG_MOD(extack, "Goto chain is not allowed if action has reformat or decap"); @@ -57,12 +58,13 @@ validate_goto_chain(struct mlx5e_priv *priv, static bool tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; - if (validate_goto_chain(flow->priv, flow, act, extack)) + if (validate_goto_chain(flow->priv, flow, attr, act, extack)) return false; return true; @@ -74,8 +76,7 @@ tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; attr->dest_chain = act->chain_index; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c index d775c3d9edf3..e8d227595b3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 2e615e0ba972..05a42fb4ba97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -99,7 +99,8 @@ get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) static bool tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; @@ -108,8 +109,8 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv = flow->priv; struct mlx5_esw_flow_attr *esw_attr; - parse_attr = flow->attr->parse_attr; - esw_attr = flow->attr->esw_attr; + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; if (!out_dev) { /* out_dev is NULL when filters with @@ -301,8 +302,7 @@ tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c index 2c74567b6d25..90b4c1b34776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow *flow = parse_state->flow; @@ -39,8 +40,7 @@ tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, { attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex; flow_flag_set(parse_state->flow, HAIRPIN); - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c index 89ca88c78840..96a80e03d129 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -8,7 +8,8 @@ static bool tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_priv *priv = parse_state->flow->priv; @@ -47,13 +48,13 @@ tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, static bool tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow *flow = parse_state->flow; struct net_device *filter_dev; - filter_dev = flow->attr->parse_attr->filter_dev; + filter_dev = attr->parse_attr->filter_dev; /* we only support mpls pop if it is the first action * and the filter net device is bareudp. Subsequent diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 79addbbef087..39f8f71bed9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -46,9 +46,9 @@ static int parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { + struct pedit_headers_action *hdrs = parse_attr->hdrs; u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; u8 htype = act->mangle.htype; int err = -EOPNOTSUPP; @@ -110,20 +110,20 @@ int mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) return parse_pedit_to_reformat(act, parse_attr, extack); - return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, hdrs, extack); + return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, extack); } static bool tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -141,8 +141,7 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, ns_type = mlx5e_get_flow_namespace(flow); - err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, - attr->parse_attr, parse_state->hdrs, + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr, flow, parse_state->extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h index da8ab03af58f..258f030a2dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -25,7 +25,6 @@ int mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c index 0819110193dc..6454b031ff7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c index 1c32e24e528d..ad09a8a5f36e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -7,16 +7,16 @@ static bool tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow *flow = parse_state->flow; struct mlx5e_tc_flow_parse_attr *parse_attr; struct net_device *out_dev = act->dev; struct mlx5_esw_flow_attr *esw_attr; - parse_attr = flow->attr->parse_attr; - esw_attr = flow->attr->esw_attr; + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; if (!out_dev) return false; @@ -58,8 +58,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, struct net_device *out_dev = act->dev; int err; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, MLX5E_TC_INT_PORT_INGRESS, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c index 6699bdf5cf01..2c0196431302 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -4,17 +4,21 @@ #include <net/psample.h> #include "act.h" #include "en/tc_priv.h" +#include "en/tc/act/sample.h" static bool tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; + bool ct_nat; - if (flow_flag_test(parse_state->flow, CT)) { - NL_SET_ERR_MSG_MOD(extack, - "Sample action with connection tracking is not supported"); + ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + + if (flow_flag_test(parse_state->flow, CT) && ct_nat) { + NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported"); return false; } @@ -27,11 +31,7 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - struct mlx5e_sample_attr *sample_attr; - - sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); - if (!sample_attr) - return -ENOMEM; + struct mlx5e_sample_attr *sample_attr = &attr->sample_attr; sample_attr->rate = act->sample.rate; sample_attr->group_num = act->sample.psample_group->group_num; @@ -39,13 +39,33 @@ tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, if (act->sample.truncate) sample_attr->trunc_size = act->sample.trunc_size; - attr->sample_attr = sample_attr; + attr->flags |= MLX5_ATTR_FLAG_SAMPLE; flow_flag_set(parse_state->flow, SAMPLE); return 0; } +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr) +{ + if (MLX5_CAP_GEN(mdev, reg_c_preserve) || + attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return true; + + return false; +} + +static bool +tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr); +} + struct mlx5e_tc_act mlx5e_tc_act_sample = { .can_offload = tc_act_can_offload_sample, .parse_action = tc_act_parse_sample, + .is_multi_table_act = tc_act_is_multi_table_act_sample, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h new file mode 100644 index 000000000000..3efb3a15c5d2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__ +#define __MLX5_EN_TC_ACT_SAMPLE_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr); + +#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c index 046b64c2cec4..a7d9eab19e4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -7,7 +7,8 @@ static bool tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { struct netlink_ext_ack *extack = parse_state->extack; @@ -25,9 +26,8 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c index 6f4a2cf46afd..b4fa2de9711d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -8,7 +8,8 @@ static bool tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { if (!act->tunnel) { NL_SET_ERR_MSG_MOD(parse_state->extack, @@ -34,7 +35,8 @@ tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, static bool tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index 70fc0c2d8813..6378b7558ba2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -9,7 +9,6 @@ static int add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack) { const struct flow_action_entry prio_tag_act = { @@ -26,7 +25,7 @@ add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, }; return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, - &prio_tag_act, parse_attr, hdrs, action, + &prio_tag_act, parse_attr, action, extack); } @@ -151,7 +150,8 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, static bool tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -170,8 +170,8 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, /* Replace vlan pop+push with vlan modify */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act, - attr->parse_attr, parse_state->hdrs, - &attr->action, parse_state->extack); + attr->parse_attr, &attr->action, + parse_state->extack); } else { err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, parse_state->extack); @@ -191,7 +191,6 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *attr) { struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; - struct pedit_headers_action *hdrs = parse_state->hdrs; struct netlink_ext_ack *extack = parse_state->extack; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -202,7 +201,7 @@ tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, * tag rewrite. */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, &attr->action, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h index 3d62f13ab61f..2fa58c6f44eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h @@ -24,7 +24,6 @@ int mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, const struct flow_action_entry *act, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack); #endif /* __MLX5_EN_TC_ACT_VLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c index 63e36e7f53e3..28444d4ffd73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -12,7 +12,6 @@ int mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, const struct flow_action_entry *act, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, u32 *action, struct netlink_ext_ack *extack) { u16 mask16 = VLAN_VID_MASK; @@ -44,7 +43,7 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, hdrs, + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, NULL, extack); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; @@ -54,7 +53,8 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, static bool tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, - int act_index) + int act_index, + struct mlx5_flow_attr *attr) { return true; } @@ -69,8 +69,7 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, int err; ns_type = mlx5e_get_flow_namespace(parse_state->flow); - err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, - attr->parse_attr, parse_state->hdrs, + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr, &attr->action, parse_state->extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 31b4e39be2d3..dea137dd744b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +#include "en/tc_priv.h" #include "en_tc.h" #include "post_act.h" #include "mlx5_core.h" @@ -75,21 +76,47 @@ mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act) kfree(post_act); } +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Post action rule matches on fte_id and executes original rule's tc rule action */ + mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK); + + handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr); + if (IS_ERR(handle->rule)) { + err = PTR_ERR(handle->rule); + netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); + goto err_rule; + } + + kvfree(spec); + return 0; + +err_rule: + kvfree(spec); + return err; +} + struct mlx5e_post_act_handle * mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr) { u32 attr_sz = ns_to_attr_sz(post_act->ns_type); - struct mlx5e_post_act_handle *handle = NULL; - struct mlx5_flow_attr *post_attr = NULL; - struct mlx5_flow_spec *spec = NULL; + struct mlx5e_post_act_handle *handle; + struct mlx5_flow_attr *post_attr; int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); post_attr = mlx5_alloc_flow_attr(post_act->ns_type); - if (!handle || !spec || !post_attr) { + if (!handle || !post_attr) { kfree(post_attr); - kvfree(spec); kfree(handle); return ERR_PTR(-ENOMEM); } @@ -100,7 +127,7 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at post_attr->ft = post_act->ft; post_attr->inner_match_level = MLX5_MATCH_NONE; post_attr->outer_match_level = MLX5_MATCH_NONE; - post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); + post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP; handle->ns_type = post_act->ns_type; /* Splits were handled before post action */ @@ -112,36 +139,29 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at if (err) goto err_xarray; - /* Post action rule matches on fte_id and executes original rule's - * tc rule action - */ - mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, - handle->id, MLX5_POST_ACTION_MASK); - - handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr); - if (IS_ERR(handle->rule)) { - err = PTR_ERR(handle->rule); - netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); - goto err_rule; - } handle->attr = post_attr; - kvfree(spec); return handle; -err_rule: - xa_erase(&post_act->ids, handle->id); err_xarray: kfree(post_attr); - kvfree(spec); kfree(handle); return ERR_PTR(err); } void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr); + handle->rule = NULL; +} + +void mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle) { - mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr); + if (!IS_ERR_OR_NULL(handle->rule)) + mlx5e_tc_post_act_unoffload(post_act, handle); xa_erase(&post_act->ids, handle->id); kfree(handle->attr); kfree(handle); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h index b530ec1981a5..f476774c0b75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -24,6 +24,14 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at void mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + +void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + struct mlx5_flow_table * mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index ff4b4f8a5a9d..fd4504518578 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -5,6 +5,7 @@ #include <net/psample.h> #include "en/mapping.h" #include "en/tc/post_act.h" +#include "en/tc/act/sample.h" #include "en/mod_hdr.h" #include "sample.h" #include "eswitch.h" @@ -46,14 +47,12 @@ struct mlx5e_sample_flow { struct mlx5_flow_handle *pre_rule; struct mlx5_flow_attr *post_attr; struct mlx5_flow_handle *post_rule; - struct mlx5e_post_act_handle *post_act_handle; }; struct mlx5e_sample_restore { struct hlist_node hlist; struct mlx5_modify_hdr *modify_hdr; struct mlx5_flow_handle *rule; - struct mlx5e_post_act_handle *post_act_handle; u32 obj_id; int count; }; @@ -231,69 +230,46 @@ sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler) */ static struct mlx5_modify_hdr * sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, - struct mlx5e_post_act_handle *handle) + struct mlx5e_tc_mod_hdr_acts *mod_acts) { - struct mlx5e_tc_mod_hdr_acts mod_acts = {}; struct mlx5_modify_hdr *modify_hdr; int err; - err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB, CHAIN_TO_REG, obj_id); if (err) goto err_set_regc0; - if (handle) { - err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts); - if (err) - goto err_post_act; - } - modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, - mod_acts.num_actions, - mod_acts.actions); + mod_acts->num_actions, + mod_acts->actions); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; } - mlx5e_mod_hdr_dealloc(&mod_acts); + mlx5e_mod_hdr_dealloc(mod_acts); return modify_hdr; err_modify_hdr: -err_post_act: - mlx5e_mod_hdr_dealloc(&mod_acts); + mlx5e_mod_hdr_dealloc(mod_acts); err_set_regc0: return ERR_PTR(err); } -static u32 -restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle) -{ - return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0); -} - -static bool -restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id, - struct mlx5e_post_act_handle *post_act_handle) -{ - return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle; -} - static struct mlx5e_sample_restore * sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, - struct mlx5e_post_act_handle *post_act_handle) + struct mlx5e_tc_mod_hdr_acts *mod_acts) { struct mlx5_eswitch *esw = tc_psample->esw; struct mlx5_core_dev *mdev = esw->dev; struct mlx5e_sample_restore *restore; struct mlx5_modify_hdr *modify_hdr; - u32 hash_key; int err; mutex_lock(&tc_psample->restore_lock); - hash_key = restore_hash(obj_id, post_act_handle); - hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key) - if (restore_equal(restore, obj_id, post_act_handle)) + hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id) + if (restore->obj_id == obj_id) goto add_ref; restore = kzalloc(sizeof(*restore), GFP_KERNEL); @@ -302,9 +278,8 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, goto err_alloc; } restore->obj_id = obj_id; - restore->post_act_handle = post_act_handle; - modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle); + modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts); if (IS_ERR(modify_hdr)) { err = PTR_ERR(modify_hdr); goto err_modify_hdr; @@ -317,7 +292,7 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, goto err_restore; } - hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key); + hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id); add_ref: restore->count++; mutex_unlock(&tc_psample->restore_lock); @@ -403,7 +378,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, post_attr->chain = 0; post_attr->prio = 0; post_attr->ft = default_tbl; - post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT; /* When offloading sample and encap action, if there is no valid * neigh data struct, a slow path rule is offloaded first. Source @@ -492,16 +467,16 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { - struct mlx5e_post_act_handle *post_act_handle = NULL; struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; struct mlx5_esw_flow_attr *pre_esw_attr; struct mlx5_mapped_obj restore_obj = {}; + struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5e_sample_flow *sample_flow; struct mlx5e_sample_attr *sample_attr; struct mlx5_flow_attr *pre_attr; + u32 tunnel_id = attr->tunnel_id; struct mlx5_eswitch *esw; u32 default_tbl_id; u32 obj_id; @@ -513,7 +488,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); if (!sample_flow) return ERR_PTR(-ENOMEM); - sample_attr = attr->sample_attr; + sample_attr = &attr->sample_attr; sample_attr->sample_flow = sample_flow; /* For NICs with reg_c_preserve support or decap action, use @@ -522,18 +497,11 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, * original flow table. */ esw = tc_psample->esw; - if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) || - attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) { struct mlx5_flow_table *ft; ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act); default_tbl_id = ft->id; - post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr); - if (IS_ERR(post_act_handle)) { - err = PTR_ERR(post_act_handle); - goto err_post_act; - } - sample_flow->post_act_handle = post_act_handle; } else { err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id); if (err) @@ -546,6 +514,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, err = PTR_ERR(sample_flow->sampler); goto err_sampler; } + sample_attr->sampler_id = sample_flow->sampler->sampler_id; /* Create an id mapping reg_c0 value to sample object. */ restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; @@ -559,7 +528,8 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, sample_attr->restore_obj_id = obj_id; /* Create sample restore context. */ - sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle); + mod_acts = &attr->parse_attr->mod_hdr_acts; + sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts); if (IS_ERR(sample_flow->restore)) { err = PTR_ERR(sample_flow->restore); goto err_sample_restore; @@ -580,13 +550,13 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, if (tunnel_id) pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; pre_attr->modify_hdr = sample_flow->restore->modify_hdr; - pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; + pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE; pre_attr->inner_match_level = attr->inner_match_level; pre_attr->outer_match_level = attr->outer_match_level; pre_attr->chain = attr->chain; pre_attr->prio = attr->prio; - pre_attr->sample_attr = attr->sample_attr; - sample_attr->sampler_id = sample_flow->sampler->sampler_id; + pre_attr->ft = attr->ft; + pre_attr->sample_attr = *sample_attr; pre_esw_attr = pre_attr->esw_attr; pre_esw_attr->in_mdev = esw_attr->in_mdev; pre_esw_attr->in_rep = esw_attr->in_rep; @@ -611,9 +581,6 @@ err_sampler: if (sample_flow->post_rule) del_post_rule(esw, sample_flow, attr); err_post_rule: - if (post_act_handle) - mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle); -err_post_act: kfree(sample_flow); return ERR_PTR(err); } @@ -633,15 +600,13 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, * will hit fw syndromes. */ esw = tc_psample->esw; - sample_flow = attr->sample_attr->sample_flow; + sample_flow = attr->sample_attr.sample_flow; mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); sample_restore_put(tc_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id); sampler_put(tc_psample, sample_flow->sampler); - if (sample_flow->post_act_handle) - mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle); - else + if (sample_flow->post_rule) del_post_rule(esw, sample_flow, attr); kfree(sample_flow->pre_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h index 9ef8a49d7801..a569367eae4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id); + struct mlx5_flow_attr *attr); void mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, @@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); static inline struct mlx5_flow_handle * mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr, - u32 tunnel_id) + struct mlx5_flow_attr *attr) { return ERR_PTR(-EOPNOTSUPP); } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 4a0d38d219ed..875e77af0ae6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -18,6 +18,7 @@ #include "lib/fs_chains.h" #include "en/tc_ct.h" +#include "en/tc_priv.h" #include "en/mod_hdr.h" #include "en/mapping.h" #include "en/tc/post_act.h" @@ -68,7 +69,6 @@ struct mlx5_tc_ct_priv { struct mlx5_ct_flow { struct mlx5_flow_attr *pre_ct_attr; struct mlx5_flow_handle *pre_ct_rule; - struct mlx5e_post_act_handle *post_act_handle; struct mlx5_ct_ft *ft; u32 chain_mapping; }; @@ -809,7 +809,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; attr->counter = entry->counter->counter; - attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) attr->esw_attr->in_mdev = priv->mdev; @@ -1756,7 +1756,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) /* We translate the tc filter with CT action to the following HW model: * * +---------------------+ - * + ft prio (tc chain) + + * + ft prio (tc chain) + * + original match + * +---------------------+ * | set chain miss mapping @@ -1766,7 +1766,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * v * +---------------------+ * + pre_ct/pre_ct_nat + if matches +-------------------------+ - * + zone+nat match +---------------->+ post_act (see below) + + * + zone+nat match +---------------->+ post_act (see below) + * +---------------------+ set zone +-------------------------+ * | set zone * v @@ -1781,21 +1781,19 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | do nat (if needed) * v * +--------------+ - * + post_act + original filter actions + * + post_act + original filter actions * + fte_id match +------------------------> * +--------------+ */ static struct mlx5_flow_handle * __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *orig_spec, struct mlx5_flow_attr *attr) { bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5e_post_act_handle *handle; struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_ct_flow *ct_flow; @@ -1818,14 +1816,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->ft = ft; - handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - ct_dbg("Failed to allocate post action handle"); - goto err_post_act_handle; - } - ct_flow->post_act_handle = handle; - /* Base flow attributes of both rules on original rule attribute */ ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); if (!ct_flow->pre_ct_attr) { @@ -1835,6 +1825,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, pre_ct_attr = ct_flow->pre_ct_attr; memcpy(pre_ct_attr, attr, attr_sz); + pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; /* Modify the original rule's action to fwd and modify, leave decap */ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; @@ -1853,30 +1844,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } ct_flow->chain_mapping = chain_mapping; - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, CHAIN_TO_REG, chain_mapping); if (err) { ct_dbg("Failed to set chain register mapping"); goto err_mapping; } - err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts); - if (err) { - ct_dbg("Failed to set post action handle"); - goto err_mapping; - } - /* If original flow is decap, we do it before going into ct table * so add a rewrite for the tunnel match_id. */ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && attr->chain == 0) { - u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); - - err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, + err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, TUNNEL_TO_REG, - tun_id); + attr->tunnel_id); if (err) { ct_dbg("Failed to set tunnel register mapping"); goto err_mapping; @@ -1884,8 +1867,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - pre_mod_acts.num_actions, - pre_mod_acts.actions); + pre_mod_acts->num_actions, + pre_mod_acts->actions); if (IS_ERR(mod_hdr)) { err = PTR_ERR(mod_hdr); ct_dbg("Failed to create pre ct mod hdr"); @@ -1905,20 +1888,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } attr->ct_attr.ct_flow = ct_flow; - mlx5e_mod_hdr_dealloc(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(pre_mod_acts); return ct_flow->pre_ct_rule; err_insert_orig: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: - mlx5e_mod_hdr_dealloc(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(pre_mod_acts); mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); err_get_chain: kfree(ct_flow->pre_ct_attr); err_alloc_pre: - mlx5e_tc_post_act_del(ct_priv->post_act, handle); -err_post_act_handle: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: kfree(ct_flow); @@ -1926,87 +1907,19 @@ err_ft: return ERR_PTR(err); } -static struct mlx5_flow_handle * -__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_flow_spec *orig_spec, - struct mlx5_flow_attr *attr, - struct mlx5e_tc_mod_hdr_acts *mod_acts) -{ - struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); - struct mlx5_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; - struct mlx5_flow_handle *rule; - struct mlx5_ct_flow *ct_flow; - int err; - - ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); - if (!ct_flow) - return ERR_PTR(-ENOMEM); - - /* Base esw attributes on original rule attribute */ - pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); - if (!pre_ct_attr) { - err = -ENOMEM; - goto err_attr; - } - - memcpy(pre_ct_attr, attr, attr_sz); - - mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, - mod_acts->num_actions, - mod_acts->actions); - if (IS_ERR(mod_hdr)) { - err = PTR_ERR(mod_hdr); - ct_dbg("Failed to add create ct clear mod hdr"); - goto err_mod_hdr; - } - - pre_ct_attr->modify_hdr = mod_hdr; - - rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - ct_dbg("Failed to add ct clear rule"); - goto err_insert; - } - - attr->ct_attr.ct_flow = ct_flow; - ct_flow->pre_ct_attr = pre_ct_attr; - ct_flow->pre_ct_rule = rule; - return rule; - -err_insert: - mlx5_modify_header_dealloc(priv->mdev, mod_hdr); -err_mod_hdr: - netdev_warn(priv->netdev, - "Failed to offload ct clear flow, err %d\n", err); - kfree(pre_ct_attr); -err_attr: - kfree(ct_flow); - - return ERR_PTR(err); -} - struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) { - bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; struct mlx5_flow_handle *rule; if (!priv) return ERR_PTR(-EOPNOTSUPP); mutex_lock(&priv->control_lock); - - if (clear_action) - rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); - else - rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); + rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); mutex_unlock(&priv->control_lock); return rule; @@ -2014,21 +1927,17 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static void __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5e_tc_flow *flow, - struct mlx5_ct_flow *ct_flow) + struct mlx5_ct_flow *ct_flow, + struct mlx5_flow_attr *attr) { struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); - mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, - pre_ct_attr); + mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - if (ct_flow->post_act_handle) { - mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); - mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle); - mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); - } + mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); + mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); kfree(ct_flow->pre_ct_attr); kfree(ct_flow); @@ -2036,7 +1945,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; @@ -2048,7 +1956,7 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, return; mutex_lock(&priv->control_lock); - __mlx5_tc_ct_delete_flow(priv, flow, ct_flow); + __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); mutex_unlock(&priv->control_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 99662af1e41a..2b21c7b97a52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -116,13 +116,11 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); bool @@ -183,7 +181,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, static inline struct mlx5_flow_handle * mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) @@ -193,7 +190,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, static inline void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, - struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 70b40ae384e4..03c953dacb09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -38,6 +38,7 @@ struct mlx5e_tc_flow_parse_attr { struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; struct ethhdr eth; @@ -108,10 +109,20 @@ struct mlx5e_tc_flow { struct rcu_head rcu_head; struct completion init_done; struct completion del_hw_done; - int tunnel_id; /* the mapped tunnel id of this flow */ struct mlx5_flow_attr *attr; + struct list_head attrs; }; +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); struct mlx5_flow_handle * @@ -120,6 +131,12 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr); +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow); + +void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow); +int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow); + bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow); bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); @@ -174,6 +191,7 @@ struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec); + void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index d39d0dae22fc..5105c8018d37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -173,19 +173,29 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; /* Do not offload flows with unresolved neighbors */ if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) continue; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + continue; + } + /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); @@ -214,12 +224,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; + spec = &flow->attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; /* mark the flow's encap dest as non-valid */ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; @@ -230,7 +241,8 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, continue; } - mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); flow->rule[0] = rule; /* was unset when fast path rule removed */ flow_flag_set(flow, OFFLOADED); @@ -488,12 +500,17 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, int out_index); void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index) { struct mlx5e_encap_entry *e = flow->encaps[out_index].e; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - if (flow->attr->esw_attr->dests[out_index].flags & + if (!mlx5e_is_eswitch_flow(flow)) + return; + + if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) mlx5e_detach_encap_route(priv, flow, out_index); @@ -733,6 +750,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -740,6 +758,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, @@ -748,7 +767,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; const struct mlx5e_mpls_info *mpls_info; unsigned long tbl_time_before = 0; @@ -837,8 +855,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, e->compl_result = 1; attach_flow: - err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, - out_index); + err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created, + tbl_time_before, out_index); if (err) goto out_err; @@ -1201,6 +1219,7 @@ out: static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct mlx5e_encap_entry *e, bool new_encap_entry, unsigned long tbl_time_before, @@ -1209,7 +1228,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned long tbl_time_after = tbl_time_before; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_route_entry *r; @@ -1360,17 +1378,19 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, list_for_each_entry(flow, encap_flows, tmp_list) { struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; struct mlx5_flow_spec *spec; if (flow_flag_test(flow, FAILED)) continue; + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; parse_attr = attr->parse_attr; - spec = &parse_attr->spec; err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, e->out_dev, e->route_dev_ifindex, @@ -1380,7 +1400,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, continue; } - err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); if (err) { mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", err); @@ -1392,9 +1412,18 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) goto offload_to_slow_path; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + goto offload_to_slow_path; + } + /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index 3391504d9a08..d542b8476491 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -7,15 +7,19 @@ #include "tc_priv.h" void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index); + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index); int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); + int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index b789af07829c..210d23bf3701 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -9,19 +9,6 @@ #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) -/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS - * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. - * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a - * full-session WQE be cache-aligned. - */ -#if L1_CACHE_BYTES < 128 -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#else -#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) -#endif - -#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) @@ -68,8 +55,6 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); /* TX */ -u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); @@ -308,9 +293,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); -static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { - return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS; + return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; } static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) @@ -431,10 +416,10 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } -static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) -{ - BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS); +#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) +static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) +{ /* A WQE must not cross the page boundary, hence two conditions: * 1. Its size must not exceed the page size. * 2. If the WQE size is X, and the space remaining in a page is less @@ -443,18 +428,28 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) * stop room of X-1 + X. * WQE size is also limited by the hardware limit. */ + WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev), + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, mlx5e_get_max_sq_wqebbs(mdev)); - if (__builtin_constant_p(wqe_size)) - BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); - else - WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); - return wqe_size * 2 - 1; + return MLX5E_STOP_ROOM(wqe_size); +} + +static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev) +{ + return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); } static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) { - u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size); + u16 room = sq->reserved_room; + + WARN_ONCE(wqe_size > sq->max_sq_wqebbs, + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, sq->max_sq_wqebbs); + + room += MLX5E_STOP_ROOM(wqe_size); return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 56e10c84a706..a7f020399370 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -199,7 +199,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -245,10 +245,8 @@ enum { INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { - const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - stop_room))) { + sq->stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; @@ -288,7 +286,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(mlx5e_xdp_mpqwe_is_full(session))) + if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs))) mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 8d991c3b7a50..c62f11d7ef6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -123,12 +123,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) return cur; } -static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session) +static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { if (session->inline_on) return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > - MLX5E_TX_MPW_MAX_NUM_DS; - return mlx5e_tx_mpwqe_is_full(session); + max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + + return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); } struct mlx5e_xdp_wqe_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index d964665eaa63..62cde3e87c2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -139,15 +139,6 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, return true; } -static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state) -{ -#ifdef CONFIG_MLX5_EN_IPSEC - return mlx5e_ipsec_is_tx_flow(&state->ipsec); -#else - return false; -#endif -} - static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, struct mlx5e_accel_tx_state *state) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 9ad3459fb63a..aaf11c66bf4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -32,9 +32,9 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); - stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); - stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); - stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); + stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS); return stop_room; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 7a700f913582..a05580cea481 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -386,5 +386,5 @@ u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par /* FPGA */ /* Resync SKB. */ - return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + return mlx5e_stop_room_for_max_wqe(mdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index a4c8d8d00d5a..d659fe07d464 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1142,7 +1142,7 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) err = mlx5_set_trust_state(priv->mdev, *trust_state); if (err) return err; - priv->dcbx_dp.trust_state = *trust_state; + WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state); return 0; } @@ -1187,16 +1187,18 @@ static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) static int mlx5e_trust_initialize(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u8 trust_state; int err; - priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; - - if (!MLX5_DSCP_SUPPORTED(mdev)) + if (!MLX5_DSCP_SUPPORTED(mdev)) { + WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP); return 0; + } - err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state); + err = mlx5_query_trust_state(priv->mdev, &trust_state); if (err) return err; + WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, priv->dcbx_dp.trust_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3667f5ef5990..91b90bbb2b28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -72,12 +72,13 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { - bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); - u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq); - bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap; + bool striding_rq_umr, inline_umr; + u16 max_wqe_sz_cap; + striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); + max_wqe_sz_cap = mlx5e_get_max_sq_wqebbs(mdev) * MLX5_SEND_WQE_BB; + inline_umr = max_wqe_sz_cap >= MLX5E_UMR_WQE_INLINE_SZ; if (!striding_rq_umr) return false; if (!inline_umr) { @@ -594,6 +595,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz); rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz); @@ -1164,6 +1166,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, is_redirect ? &c->priv->channel_stats[c->ix]->xdpsq : &c->priv->channel_stats[c->ix]->rq_xdpsq; + sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs); + sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1238,6 +1243,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->reserved_room = param->stop_room; + sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1313,7 +1319,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int err; sq->pdev = c->pdev; - sq->tstamp = c->tstamp; sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; @@ -1324,6 +1329,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); @@ -2677,39 +2684,41 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) struct mlx5e_txqsq *sq = &c->sq[tc]; priv->txq2sq[sq->txq_ix] = sq; - priv->channel_tc2realtxq[i][tc] = i + tc * ch; } } if (!priv->channels.ptp) - return; + goto out; if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) - return; + goto out; for (tc = 0; tc < num_tc; tc++) { struct mlx5e_ptp *c = priv->channels.ptp; struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; priv->txq2sq[sq->txq_ix] = sq; - priv->port_ptp_tc2realtxq[tc] = priv->num_tc_x_num_ch + tc; } -} -static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv) -{ - /* Sync with mlx5e_select_queue. */ - WRITE_ONCE(priv->num_tc_x_num_ch, - mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num); +out: + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); } void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { - mlx5e_update_num_tc_x_num_ch(priv); mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); mlx5e_qos_activate_queues(priv); mlx5e_xdp_tx_enable(priv); + + /* dev_watchdog() wants all TX queues to be started when the carrier is + * OK, including the ones in range real_num_tx_queues..num_tx_queues-1. + * Make it happy to avoid TX timeout false alarms. + */ netif_tx_start_all_queues(priv->netdev); if (mlx5e_is_vport_rep(priv)) @@ -2729,11 +2738,13 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); - /* FIXME: This is a W/A only for tx timeout watch dog false alarm when - * polling for inactive tx queues. + /* The results of ndo_select_queue are unreliable, while netdev config + * is being changed (real_num_tx_queues, num_tc). Stop all queues to + * prevent ndo_start_xmit from being called, so that it can assume that + * the selected queue is always valid. */ - netif_tx_stop_all_queues(priv->netdev); netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); mlx5e_deactivate_channels(&priv->channels); } @@ -2793,6 +2804,7 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, mlx5e_close_channels(&old_chs); priv->profile->update_rx(priv); + mlx5e_selq_apply(&priv->selq); out: mlx5e_activate_priv_channels(priv); @@ -2816,13 +2828,24 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, return mlx5e_switch_priv_params(priv, params, preactivate, context); new_chs.params = *params; + + mlx5e_selq_prepare(&priv->selq, &new_chs.params, !!priv->htb.maj_id); + err = mlx5e_open_channels(priv, &new_chs); if (err) - return err; + goto err_cancel_selq; + err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context); if (err) - mlx5e_close_channels(&new_chs); + goto err_close; + + return 0; + +err_close: + mlx5e_close_channels(&new_chs); +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); return err; } @@ -2862,6 +2885,8 @@ int mlx5e_open_locked(struct net_device *netdev) struct mlx5e_priv *priv = netdev_priv(netdev); int err; + mlx5e_selq_prepare(&priv->selq, &priv->channels.params, !!priv->htb.maj_id); + set_bit(MLX5E_STATE_OPENED, &priv->state); err = mlx5e_open_channels(priv, &priv->channels); @@ -2869,6 +2894,7 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; priv->profile->update_rx(priv); + mlx5e_selq_apply(&priv->selq); mlx5e_activate_priv_channels(priv); mlx5e_apply_traps(priv, true); if (priv->profile->update_carrier) @@ -2879,6 +2905,7 @@ int mlx5e_open_locked(struct net_device *netdev) err_clear_state_opened_flag: clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_selq_cancel(&priv->selq); return err; } @@ -4636,11 +4663,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 priv->max_nch); mlx5e_params_mqprio_reset(params); - /* Set an initial non-zero value, so that mlx5e_select_queue won't - * divide by zero if called before first activating channels. - */ - priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; - /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@ -5193,7 +5215,8 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, struct net_device *netdev, struct mlx5_core_dev *mdev) { - int nch, num_txqs, node, i; + int nch, num_txqs, node; + int err; num_txqs = netdev->num_tx_queues; nch = mlx5e_calc_max_nch(mdev, netdev, profile); @@ -5210,6 +5233,11 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, return -ENOMEM; mutex_init(&priv->state_lock); + + err = mlx5e_selq_init(&priv->selq, &priv->state_lock); + if (err) + goto err_free_cpumask; + hash_init(priv->htb.qos_tc2node); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); @@ -5218,7 +5246,7 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - goto err_free_cpumask; + goto err_free_selq; priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); if (!priv->txq2sq) @@ -5228,36 +5256,21 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, if (!priv->tx_rates) goto err_free_txq2sq; - priv->channel_tc2realtxq = - kcalloc_node(nch, sizeof(*priv->channel_tc2realtxq), GFP_KERNEL, node); - if (!priv->channel_tc2realtxq) - goto err_free_tx_rates; - - for (i = 0; i < nch; i++) { - priv->channel_tc2realtxq[i] = - kcalloc_node(profile->max_tc, sizeof(**priv->channel_tc2realtxq), - GFP_KERNEL, node); - if (!priv->channel_tc2realtxq[i]) - goto err_free_channel_tc2realtxq; - } - priv->channel_stats = kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); if (!priv->channel_stats) - goto err_free_channel_tc2realtxq; + goto err_free_tx_rates; return 0; -err_free_channel_tc2realtxq: - while (--i >= 0) - kfree(priv->channel_tc2realtxq[i]); - kfree(priv->channel_tc2realtxq); err_free_tx_rates: kfree(priv->tx_rates); err_free_txq2sq: kfree(priv->txq2sq); err_destroy_workqueue: destroy_workqueue(priv->wq); +err_free_selq: + mlx5e_selq_cleanup(&priv->selq); err_free_cpumask: free_cpumask_var(priv->scratchpad.cpumask); return -ENOMEM; @@ -5274,12 +5287,12 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) for (i = 0; i < priv->stats_nch; i++) kvfree(priv->channel_stats[i]); kfree(priv->channel_stats); - for (i = 0; i < priv->max_nch; i++) - kfree(priv->channel_tc2realtxq[i]); - kfree(priv->channel_tc2realtxq); kfree(priv->tx_rates); kfree(priv->txq2sq); destroy_workqueue(priv->wq); + mutex_lock(&priv->state_lock); + mlx5e_selq_cleanup(&priv->selq); + mutex_unlock(&priv->state_lock); free_cpumask_var(priv->scratchpad.cpumask); for (i = 0; i < priv->htb.max_qos_sqs; i++) @@ -5345,6 +5358,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof } netif_carrier_off(netdev); + netif_tx_disable(netdev); dev_net_set(netdev, mlx5_core_net(mdev)); return netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 06d1f46f1688..6b7e7ea6ded2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -55,6 +55,7 @@ #include "diag/en_rep_tracepoint.h" #include "en_accel/ipsec.h" #include "en/tc/int_port.h" +#include "en/ptp.h" #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) @@ -401,13 +402,18 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + int n, tc, nch, num_sqs = 0; struct mlx5e_channel *c; - int n, tc, num_sqs = 0; int err = -ENOMEM; + bool ptp_sq; u32 *sqs; - sqs = kcalloc(priv->channels.num * mlx5e_get_dcb_num_tc(&priv->channels.params), - sizeof(*sqs), GFP_KERNEL); + ptp_sq = !!(priv->channels.ptp && + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)); + nch = priv->channels.num + ptp_sq; + + sqs = kcalloc(nch * mlx5e_get_dcb_num_tc(&priv->channels.params), sizeof(*sqs), + GFP_KERNEL); if (!sqs) goto out; @@ -416,6 +422,12 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) for (tc = 0; tc < c->num_tc; tc++) sqs[num_sqs++] = c->sq[tc].sqn; } + if (ptp_sq) { + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + + for (tc = 0; tc < ptp_ch->num_tc; tc++) + sqs[num_sqs++] = ptp_ch->ptpsq[tc].txqsq.sqn; + } err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs); kfree(sqs); @@ -632,11 +644,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->mqprio.num_tc = 1; params->tunneled_offload_en = false; - /* Set an initial non-zero value, so that mlx5e_select_queue won't - * divide by zero if called before first activating channels. - */ - priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc; - mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } @@ -935,15 +942,21 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_tc_ht_init(&rpriv->tc_ht); + if (err) + goto err_ht_init; + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_init_uplink_rep_tx(rpriv); if (err) - goto destroy_tises; + goto err_init_tx; } return 0; -destroy_tises: +err_init_tx: + mlx5e_tc_ht_cleanup(&rpriv->tc_ht); +err_ht_init: mlx5e_destroy_tises(priv); return err; } @@ -963,6 +976,8 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) if (rpriv->rep->vport == MLX5_VPORT_UPLINK) mlx5e_cleanup_uplink_rep_tx(rpriv); + + mlx5e_tc_ht_cleanup(&rpriv->tc_ht); } static void mlx5e_rep_enable(struct mlx5e_priv *priv) @@ -1099,6 +1114,7 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { &MLX5E_STATS_GRP(ipsec_sw), &MLX5E_STATS_GRP(ipsec_hw), #endif + &MLX5E_STATS_GRP(ptp), }; static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index b3f7520dfd08..adf5cc6a7b8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -64,11 +64,6 @@ struct mlx5e_tc_tun_encap; struct mlx5e_post_act; struct mlx5_rep_uplink_priv { - /* Filters DB - instantiated by the uplink representor and shared by - * the uplink's VFs - */ - struct rhashtable tc_ht; - /* indirect block callbacks are invoked on bind/unbind events * on registered higher level devices (e.g. tunnel devices) * @@ -113,6 +108,7 @@ struct mlx5e_rep_priv { struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ struct rtnl_link_stats64 prev_vf_vport_stats; + struct rhashtable tc_ht; }; static inline diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 6530d7bd5045..074a44b281b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -620,7 +620,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) struct mlx5e_icosq *sq = rq->icosq; int i, err, max_klm_entries, len; - max_klm_entries = MLX5E_MAX_KLM_PER_WQE; + max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); klm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); @@ -960,8 +960,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk)) rq->stats->congst_umr++; -#define UMR_WQE_BULK (2) - if (likely(missing < UMR_WQE_BULK)) + if (likely(missing < rq->mpwqe.min_wqe_bulk)) return false; if (rq->page_pool) @@ -1490,7 +1489,7 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, static inline struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, u32 frag_size, u16 headroom, - u32 cqe_bcnt) + u32 cqe_bcnt, u32 metasize) { struct sk_buff *skb = build_skb(va, frag_size); @@ -1502,6 +1501,9 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, skb_reserve(skb, headroom); skb_put(skb, cqe_bcnt); + if (metasize) + skb_metadata_set(skb, metasize); + return skb; } @@ -1509,7 +1511,7 @@ static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, u32 len, struct xdp_buff *xdp) { xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq); - xdp_prepare_buff(xdp, va, headroom, len, false); + xdp_prepare_buff(xdp, va, headroom, len, true); } static struct sk_buff * @@ -1522,6 +1524,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct sk_buff *skb; void *va, *data; u32 frag_size; + u32 metasize; va = page_address(di->page) + wi->offset; data = va + rx_headroom; @@ -1538,7 +1541,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, rx_headroom = xdp.data - xdp.data_hard_start; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt); + metasize = xdp.data - xdp.data_meta; + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); if (unlikely(!skb)) return NULL; @@ -1837,6 +1841,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct sk_buff *skb; void *va, *data; u32 frag_size; + u32 metasize; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -1862,7 +1867,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, rx_headroom = xdp.data - xdp.data_hard_start; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); - skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32); + metasize = xdp.data - xdp.data_meta; + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32, metasize); if (unlikely(!skb)) return NULL; @@ -1893,7 +1899,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE); prefetchw(hdr); prefetch(data); - skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size); + skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 00f1d16db456..336e4d04c5f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -37,6 +37,10 @@ #include "en/ptp.h" #include "en/port.h" +#ifdef CONFIG_PAGE_POOL_STATS +#include <net/page_pool.h> +#endif + static unsigned int stats_grps_num(struct mlx5e_priv *priv) { return !priv->profile->stats_grps_num ? 0 : @@ -183,6 +187,19 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) }, +#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, @@ -349,6 +366,19 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_congst_umr += rq_stats->congst_umr; s->rx_arfs_err += rq_stats->arfs_err; s->rx_recover += rq_stats->recover; +#ifdef CONFIG_PAGE_POOL_STATS + s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast; + s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow; + s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty; + s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill; + s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive; + s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order; + s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached; + s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full; + s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring; + s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full; + s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; @@ -455,6 +485,35 @@ static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, } } +#ifdef CONFIG_PAGE_POOL_STATS +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ + struct mlx5e_rq_stats *rq_stats = c->rq.stats; + struct page_pool *pool = c->rq.page_pool; + struct page_pool_stats stats = { 0 }; + + if (!page_pool_get_stats(pool, &stats)) + return; + + rq_stats->pp_alloc_fast = stats.alloc_stats.fast; + rq_stats->pp_alloc_slow = stats.alloc_stats.slow; + rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order; + rq_stats->pp_alloc_empty = stats.alloc_stats.empty; + rq_stats->pp_alloc_waive = stats.alloc_stats.waive; + rq_stats->pp_alloc_refill = stats.alloc_stats.refill; + + rq_stats->pp_recycle_cached = stats.recycle_stats.cached; + rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full; + rq_stats->pp_recycle_ring = stats.recycle_stats.ring; + rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full; + rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt; +} +#else +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ +} +#endif + static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -465,8 +524,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; + int j; + mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]); + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); mlx5e_stats_grp_sw_update_stats_xdpsq(s, &channel_stats->rq_xdpsq); mlx5e_stats_grp_sw_update_stats_ch_stats(s, &channel_stats->ch); @@ -1887,6 +1949,19 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) }, +#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, @@ -2348,7 +2423,7 @@ MLX5E_DEFINE_STATS_GRP(channels, 0); MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); MLX5E_DEFINE_STATS_GRP(eth_ext, 0); static MLX5E_DEFINE_STATS_GRP(tls, 0); -static MLX5E_DEFINE_STATS_GRP(ptp, 0); +MLX5E_DEFINE_STATS_GRP(ptp, 0); static MLX5E_DEFINE_STATS_GRP(qos, 0); /* The stats groups order is opposite to the update_stats() order calls */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 2c1ed5b81be6..a7a025d15c14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -205,7 +205,19 @@ struct mlx5e_sw_stats { u64 ch_aff_change; u64 ch_force_irq; u64 ch_eq_rearm; - +#ifdef CONFIG_PAGE_POOL_STATS + u64 rx_pp_alloc_fast; + u64 rx_pp_alloc_slow; + u64 rx_pp_alloc_slow_high_order; + u64 rx_pp_alloc_empty; + u64 rx_pp_alloc_refill; + u64 rx_pp_alloc_waive; + u64 rx_pp_recycle_cached; + u64 rx_pp_recycle_cache_full; + u64 rx_pp_recycle_ring; + u64 rx_pp_recycle_ring_full; + u64 rx_pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; @@ -352,6 +364,19 @@ struct mlx5e_rq_stats { u64 congst_umr; u64 arfs_err; u64 recover; +#ifdef CONFIG_PAGE_POOL_STATS + u64 pp_alloc_fast; + u64 pp_alloc_slow; + u64 pp_alloc_slow_high_order; + u64 pp_alloc_empty; + u64 pp_alloc_refill; + u64 pp_alloc_waive; + u64 pp_recycle_cached; + u64 pp_recycle_cache_full; + u64 pp_recycle_ring; + u64 pp_recycle_ring_full; + u64 pp_recycle_released_ref; +#endif #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; @@ -459,5 +484,6 @@ extern MLX5E_DECLARE_STATS_GRP(channels); extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest); extern MLX5E_DECLARE_STATS_GRP(ipsec_hw); extern MLX5E_DECLARE_STATS_GRP(ipsec_sw); +extern MLX5E_DECLARE_STATS_GRP(ptp); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b27532a9301e..e3fc15ae7bb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -115,6 +115,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { static struct lock_class_key tc_ht_lock_key; static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); +static void free_flow_post_acts(struct mlx5e_tc_flow *flow); void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, @@ -273,6 +274,23 @@ get_sample_priv(struct mlx5e_priv *priv) return NULL; } +static struct mlx5e_post_act * +get_post_action(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->post_act; + } + + return priv->fs.tc.post_act; +} + struct mlx5_flow_handle * mlx5_tc_rule_insert(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, @@ -295,13 +313,62 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, if (is_mdev_switchdev_mode(priv->mdev)) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - return; } mlx5e_del_offloaded_nic_rule(priv, rule, attr); } +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = + &attr->parse_attr->mod_hdr_acts; + + return mlx5_tc_ct_flow_offload(get_ct_priv(priv), + spec, attr, + mod_hdr_acts); + } + + if (!is_mdev_switchdev_mode(priv->mdev)) + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) + return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); + + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); +} + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (attr->flags & MLX5_ATTR_FLAG_CT) { + mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr); + return; + } + + if (!is_mdev_switchdev_mode(priv->mdev)) { + mlx5e_del_offloaded_nic_rule(priv, rule, attr); + return; + } + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { + mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); + return; + } + + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); +} + int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, @@ -1039,6 +1106,21 @@ err_ft_get: } static int +alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, + struct mlx5_flow_attr *attr) + +{ + struct mlx5_fc *counter; + + counter = mlx5_fc_create(counter_dev, true); + if (IS_ERR(counter)) + return PTR_ERR(counter); + + attr->counter = counter; + return 0; +} + +static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -1046,7 +1128,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_fc *counter; int err; parse_attr = attr->parse_attr; @@ -1058,11 +1139,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) - return PTR_ERR(counter); - - attr->counter = counter; + err = alloc_flow_attr_counter(dev, attr); + if (err) + return err; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { @@ -1072,8 +1151,8 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, return err; } - if (flow_flag_test(flow, CT)) - flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec, + if (attr->flags & MLX5_ATTR_FLAG_CT) + flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec, attr, &parse_attr->mod_hdr_acts); else flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, @@ -1107,8 +1186,8 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, flow_flag_clear(flow, OFFLOADED); - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + if (attr->flags & MLX5_ATTR_FLAG_CT) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); else if (!IS_ERR_OR_NULL(flow->rule[0])) mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); @@ -1132,6 +1211,8 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); + free_flow_post_acts(flow); + kvfree(attr->parse_attr); kfree(flow->attr); } @@ -1142,40 +1223,27 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) { - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - if (flow_flag_test(flow, CT)) { - mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; - - rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), - flow, spec, attr, - mod_hdr_acts); - } else if (flow_flag_test(flow, SAMPLE)) { - rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, - mlx5e_tc_get_flow_tun_id(flow)); - } else { - rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - } + rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); if (IS_ERR(rule)) return rule; if (attr->esw_attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); - if (IS_ERR(flow->rule[1])) { - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - else - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - return flow->rule[1]; - } + if (IS_ERR(flow->rule[1])) + goto err_rule1; } return rule; + +err_rule1: + mlx5e_tc_rule_unoffload(flow->priv, rule, attr); + return flow->rule[1]; } void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, @@ -1184,19 +1252,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow_flag_clear(flow, OFFLOADED); - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) - goto offload_rule_0; + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); - if (flow_flag_test(flow, CT)) - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - else if (flow_flag_test(flow, SAMPLE)) - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - else -offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); } struct mlx5_flow_handle * @@ -1214,7 +1276,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->esw_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) @@ -1239,7 +1301,7 @@ void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->esw_attr->split_count = 0; - slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow_flag_clear(flow, SLOW); kfree(slow_attr); @@ -1348,10 +1410,10 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro } int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; struct mlx5_modify_hdr *mod_hdr; mod_hdr = mlx5_modify_header_alloc(priv->mdev, @@ -1361,13 +1423,107 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, if (IS_ERR(mod_hdr)) return PTR_ERR(mod_hdr); - WARN_ON(flow->attr->modify_hdr); - flow->attr->modify_hdr = mod_hdr; + WARN_ON(attr->modify_hdr); + attr->modify_hdr = mod_hdr; return 0; } static int +set_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *encap_valid, + bool *vf_tun) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *encap_dev = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + int out_index; + int err = 0; + + if (!mlx5e_is_eswitch_flow(flow)) + return 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + *vf_tun = false; + *encap_valid = true; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; + int mirred_ifindex; + + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto out; + } + err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, + extack, &encap_dev, encap_valid); + dev_put(out_dev); + if (err) + goto out; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].mdev = out_priv->mdev; + } + + if (*vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +static void +clean_encap_dests(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + bool *vf_tun) +{ + struct mlx5_esw_flow_attr *esw_attr; + int out_index; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + esw_attr = attr->esw_attr; + *vf_tun = false; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + mlx5e_detach_encap(priv, flow, attr, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } +} + +static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -1375,15 +1531,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; - bool vf_tun = false, encap_valid = true; - struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *out_priv; - struct mlx5_fc *counter; + bool vf_tun, encap_valid; u32 max_prio, max_chain; int err = 0; - int out_index; parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; @@ -1472,50 +1623,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - struct net_device *out_dev; - int mirred_ifindex; - - if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) - continue; - - mirred_ifindex = parse_attr->mirred_ifindex[out_index]; - out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); - if (!out_dev) { - NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); - err = -ENODEV; - goto err_out; - } - err = mlx5e_attach_encap(priv, flow, out_dev, out_index, - extack, &encap_dev, &encap_valid); - dev_put(out_dev); - if (err) - goto err_out; - - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - vf_tun = true; - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; - esw_attr->dests[out_index].mdev = out_priv->mdev; - } - - if (vf_tun && esw_attr->out_count > 1) { - NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); - err = -EOPNOTSUPP; + err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); + if (err) goto err_out; - } err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) goto err_out; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && - !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { if (vf_tun) { - err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); if (err) goto err_out; } else { @@ -1526,20 +1644,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw_attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); + err = alloc_flow_attr_counter(esw_attr->counter_dev, attr); + if (err) goto err_out; - } - - attr->counter = counter; } /* we get here if one of the following takes place: * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid) + if (!encap_valid || flow_flag_test(flow, SLOW)) flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); @@ -1576,8 +1690,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun = false; - int out_index; + bool vf_tun; esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); @@ -1601,16 +1714,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow->decap_route) mlx5e_detach_decap_route(priv, flow); - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - vf_tun = true; - if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { - mlx5e_detach_encap(priv, flow, out_index); - kfree(attr->parse_attr->tun_info[out_index]); - } - } + clean_encap_dests(priv, flow, attr, &vf_tun); mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); @@ -1634,7 +1738,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); - kfree(attr->sample_attr); + free_flow_post_acts(flow); + kvfree(attr->esw_attr->rx_tun_attr); kvfree(attr->parse_attr); kfree(flow->attr); @@ -1642,7 +1747,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - return flow->attr->counter; + struct mlx5_flow_attr *attr; + + attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list); + return attr->counter; } /* Iterate over tmp_list of flows attached to flow_list head. */ @@ -1854,7 +1962,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - flow->tunnel_id = value; + flow->attr->tunnel_id = value; return 0; err_set: @@ -1868,8 +1976,8 @@ err_enc_opts: static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) { - u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; - u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; + u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; struct mlx5_eswitch *esw; @@ -1885,11 +1993,6 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) enc_opts_id); } -u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) -{ - return flow->tunnel_id; -} - void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, struct flow_match_basic *match, bool outer, void *headers_c, void *headers_v) @@ -2811,14 +2914,15 @@ static unsigned long mask_to_le(unsigned long mask, int size) return mask; } + static int offload_pedit_fields(struct mlx5e_priv *priv, int namespace, - struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, u32 *action_flags, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + struct pedit_headers_action *hdrs = parse_attr->hdrs; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; @@ -2944,35 +3048,43 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, static const struct pedit_headers zero_masks = {}; -static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - u32 *action_flags, - struct netlink_ext_ack *extack) +static int verify_offload_pedit_fields(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) { struct pedit_headers *cmd_masks; - int err; u8 cmd; - err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, - action_flags, extack); - if (err < 0) - goto out_dealloc_parsed_actions; - for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { - cmd_masks = &hdrs[cmd].masks; + cmd_masks = &parse_attr->hdrs[cmd].masks; if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { - NL_SET_ERR_MSG_MOD(extack, - "attempt to offload an unsupported field"); + NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 16, 1, cmd_masks, sizeof(zero_masks), true); - err = -EOPNOTSUPP; - goto out_dealloc_parsed_actions; + return -EOPNOTSUPP; } } return 0; +} + +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action_flags, + struct netlink_ext_ack *extack) +{ + int err; + + err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); + if (err) + goto out_dealloc_parsed_actions; + + err = verify_offload_pedit_fields(priv, parse_attr, extack); + if (err) + goto out_dealloc_parsed_actions; + + return 0; out_dealloc_parsed_actions: mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); @@ -3176,11 +3288,11 @@ actions_match_supported_fdb(struct mlx5e_priv *priv, static bool actions_match_supported(struct mlx5e_priv *priv, struct flow_action *flow_action, + u32 actions, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { - u32 actions = flow->attr->action; bool ct_flow, ct_clear; ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; @@ -3248,57 +3360,13 @@ bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) } static int -parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, - struct flow_action *flow_action) -{ - struct netlink_ext_ack *extack = parse_state->extack; - struct mlx5e_tc_flow *flow = parse_state->flow; - struct mlx5_flow_attr *attr = flow->attr; - enum mlx5_flow_namespace_type ns_type; - struct mlx5e_priv *priv = flow->priv; - const struct flow_action_entry *act; - struct mlx5e_tc_act *tc_act; - int err, i; - - ns_type = mlx5e_get_flow_namespace(flow); - - flow_action_for_each(i, act, flow_action) { - tc_act = mlx5e_tc_act_get(act->id, ns_type); - if (!tc_act) { - NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); - return -EOPNOTSUPP; - } - - if (!tc_act->can_offload(parse_state, act, i)) - return -EOPNOTSUPP; - - err = tc_act->parse_action(parse_state, act, priv, attr); - if (err) - return err; - } - - flow_action_for_each(i, act, flow_action) { - tc_act = mlx5e_tc_act_get(act->id, ns_type); - if (!tc_act || !tc_act->post_parse || - !tc_act->can_offload(parse_state, act, i)) - continue; - - err = tc_act->post_parse(parse_state, priv, attr); - if (err) - return err; - } - - return 0; -} - -static int actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, - struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct pedit_headers_action *hdrs = parse_attr->hdrs; enum mlx5_flow_namespace_type ns_type; int err; @@ -3308,8 +3376,7 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, ns_type = mlx5e_get_flow_namespace(flow); - err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs, - &attr->action, extack); + err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); if (err) return err; @@ -3330,6 +3397,299 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, return 0; } +static struct mlx5_flow_attr* +mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + u32 attr_sz = ns_to_attr_sz(ns_type); + struct mlx5_flow_attr *attr2; + + attr2 = mlx5_alloc_flow_attr(ns_type); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); + if (!attr2 || !parse_attr) { + kvfree(parse_attr); + kfree(attr2); + return NULL; + } + + memcpy(attr2, attr, attr_sz); + INIT_LIST_HEAD(&attr2->list); + parse_attr->filter_dev = attr->parse_attr->filter_dev; + attr2->action = 0; + attr2->flags = 0; + attr2->parse_attr = parse_attr; + return attr2; +} + +static struct mlx5_core_dev * +get_flow_counter_dev(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; +} + +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + int i; + + list_for_each_entry(attr, &flow->attrs, list) { + esw_attr = attr->esw_attr; + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) + return attr; + } + } + + return NULL; +} + +void +mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle); + } +} + +static void +free_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr, *tmp; + bool vf_tun; + + list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + if (attr->post_act_handle) + mlx5e_tc_post_act_del(post_act, attr->post_act_handle); + + clean_encap_dests(flow->priv, flow, attr, &vf_tun); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(counter_dev, attr->counter); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + if (attr->modify_hdr) + mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr); + } + + list_del(&attr->list); + kvfree(attr->parse_attr); + kfree(attr); + } +} + +int +mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + int err = 0; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle); + if (err) + break; + } + + return err; +} + +/* TC filter rule HW translation: + * + * +---------------------+ + * + ft prio (tc chain) + + * + original match + + * +---------------------+ + * | + * | if multi table action + * | + * v + * +---------------------+ + * + post act ft |<----. + * + match fte id | | split on multi table action + * + do actions |-----' + * +---------------------+ + * | + * | + * v + * Do rest of the actions after last multi table action. + */ +static int +alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr, *next_attr = NULL; + struct mlx5e_post_act_handle *handle; + bool vf_tun, encap_valid = true; + int err; + + /* This is going in reverse order as needed. + * The first entry is the last attribute. + */ + list_for_each_entry(attr, &flow->attrs, list) { + if (!next_attr) { + /* Set counter action on last post act rule. */ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } else { + err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); + if (err) + goto out_free; + } + + /* Don't add post_act rule for first attr (last in the list). + * It's being handled by the caller. + */ + if (list_is_last(&attr->list, &flow->attrs)) + break; + + err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun); + if (err) + goto out_free; + + if (!encap_valid) + flow_flag_set(flow, SLOW); + + err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); + if (err) + goto out_free; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr); + if (err) + goto out_free; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); + if (err) + goto out_free; + } + + handle = mlx5e_tc_post_act_add(post_act, attr); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_free; + } + + attr->post_act_handle = handle; + next_attr = attr; + } + + if (flow_flag_test(flow, SLOW)) + goto out; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) + goto out_free; + +out: + return 0; + +out_free: + free_flow_post_acts(flow); + return err; +} + +static int +parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow_action flow_action_reorder; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5_flow_attr *attr = flow->attr; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_priv *priv = flow->priv; + struct flow_action_entry *act, **_act; + struct mlx5e_tc_act *tc_act; + int err, i; + + flow_action_reorder.num_entries = flow_action->num_entries; + flow_action_reorder.entries = kcalloc(flow_action->num_entries, + sizeof(flow_action), GFP_KERNEL); + if (!flow_action_reorder.entries) + return -ENOMEM; + + mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder); + + ns_type = mlx5e_get_flow_namespace(flow); + list_add(&attr->list, &flow->attrs); + + flow_action_for_each(i, _act, &flow_action_reorder) { + act = *_act; + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act) { + NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); + err = -EOPNOTSUPP; + goto out_free; + } + + if (!tc_act->can_offload(parse_state, act, i, attr)) { + err = -EOPNOTSUPP; + goto out_free; + } + + err = tc_act->parse_action(parse_state, act, priv, attr); + if (err) + goto out_free; + + parse_state->actions |= attr->action; + + /* Split attr for multi table act if not the last act. */ + if (tc_act->is_multi_table_act && + tc_act->is_multi_table_act(priv, act, attr) && + i < flow_action_reorder.num_entries - 1) { + err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + if (err) + goto out_free; + + attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); + if (!attr) { + err = -ENOMEM; + goto out_free; + } + + list_add(&attr->list, &flow->attrs); + } + } + + kfree(flow_action_reorder.entries); + + err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + if (err) + goto out_free_post_acts; + + err = alloc_flow_post_acts(flow, extack); + if (err) + goto out_free_post_acts; + + return 0; + +out_free: + kfree(flow_action_reorder.entries); +out_free_post_acts: + free_flow_post_acts(flow); + + return err; +} + static int flow_action_supported(struct flow_action *flow_action, struct netlink_ext_ack *extack) @@ -3357,7 +3717,6 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, struct mlx5e_tc_act_parse_state *parse_state; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; - struct pedit_headers_action *hdrs; int err; err = flow_action_supported(flow_action, extack); @@ -3369,17 +3728,17 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, parse_state = &parse_attr->parse_state; mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); parse_state->ct_priv = get_ct_priv(priv); - hdrs = parse_state->hdrs; err = parse_tc_actions(parse_state, flow_action); if (err) return err; - err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); if (err) return err; - if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) return -EOPNOTSUPP; return 0; @@ -3480,7 +3839,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - struct pedit_headers_action *hdrs; int err; err = flow_action_supported(flow_action, extack); @@ -3492,7 +3850,6 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv, parse_state = &parse_attr->parse_state; mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); parse_state->ct_priv = get_ct_priv(priv); - hdrs = parse_state->hdrs; err = parse_tc_actions(parse_state, flow_action); if (err) @@ -3506,11 +3863,12 @@ parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); if (err) return err; - if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) return -EOPNOTSUPP; return 0; @@ -3545,12 +3903,11 @@ static const struct rhashtable_params tc_ht_params = { static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, unsigned long flags) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_rep_priv *rpriv; if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - return &uplink_rpriv->uplink_priv.tc_ht; + rpriv = priv->ppriv; + return &rpriv->tc_ht; } else /* NIC offload */ return &priv->fs.tc.ht; } @@ -3585,7 +3942,12 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) sizeof(struct mlx5_nic_flow_attr); struct mlx5_flow_attr *attr; - return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); + attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); + if (!attr) + return attr; + + INIT_LIST_HEAD(&attr->list); + return attr; } static int @@ -3619,6 +3981,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->encaps[out_index].list); INIT_LIST_HEAD(&flow->hairpin); INIT_LIST_HEAD(&flow->l3_to_l2_reformat); + INIT_LIST_HEAD(&flow->attrs); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); init_completion(&flow->del_hw_done); @@ -4119,6 +4482,46 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, return err; } +static int mlx5e_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct netlink_ext_ack *extack) @@ -4146,10 +4549,10 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + err = mlx5e_policer_validate(flow_action, act, extack); + if (err) + return err; + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); if (err) return err; @@ -4383,10 +4786,27 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mlx5_chains_destroy(tc->chains); } -int mlx5e_tc_esw_init(struct rhashtable *tc_ht) +int mlx5e_tc_ht_init(struct rhashtable *tc_ht) +{ + int err; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + return err; + + lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); + + return 0; +} + +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) +{ + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); +} + +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) { const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); - struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *rpriv; struct mapping_ctx *mapping; struct mlx5_eswitch *esw; @@ -4394,7 +4814,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) u64 mapping_id; int err = 0; - uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); priv = netdev_priv(rpriv->netdev); esw = priv->mdev->priv.eswitch; @@ -4434,12 +4853,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) } uplink_priv->tunnel_enc_opts_mapping = mapping; - err = rhashtable_init(tc_ht, &tc_ht_params); - if (err) - goto err_ht_init; - - lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); - uplink_priv->encap = mlx5e_tc_tun_init(priv); if (IS_ERR(uplink_priv->encap)) { err = PTR_ERR(uplink_priv->encap); @@ -4449,8 +4862,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) return 0; err_register_fib_notifier: - rhashtable_destroy(tc_ht); -err_ht_init: mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); err_enc_opts_mapping: mapping_destroy(uplink_priv->tunnel_mapping); @@ -4464,13 +4875,8 @@ err_tun_mapping: return err; } -void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) { - struct mlx5_rep_uplink_priv *uplink_priv; - - uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); - - rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); mlx5e_tc_tun_cleanup(uplink_priv->encap); mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 5ffae9b13066..a80b00946f1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -53,7 +53,6 @@ ESW_FLOW_ATTR_SZ :\ NIC_FLOW_ATTR_SZ) - int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); struct mlx5e_tc_update_priv { @@ -71,7 +70,7 @@ struct mlx5_flow_attr { struct mlx5_fc *counter; struct mlx5_modify_hdr *modify_hdr; struct mlx5_ct_attr ct_attr; - struct mlx5e_sample_attr *sample_attr; + struct mlx5e_sample_attr sample_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; u32 chain; u16 prio; @@ -82,13 +81,33 @@ struct mlx5_flow_attr { u8 outer_match_level; u8 ip_version; u8 tun_ip_version; + int tunnel_id; /* mapped tunnel id */ u32 flags; + struct list_head list; + struct mlx5e_post_act_handle *post_act_handle; union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; }; }; +enum { + MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0), + MLX5_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3), + MLX5_ATTR_FLAG_SAMPLE = BIT(4), + MLX5_ATTR_FLAG_ACCEPT = BIT(5), + MLX5_ATTR_FLAG_CT = BIT(6), +}; + +/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ +static inline bool +mlx5e_tc_attr_flags_skip(u32 attr_flags) +{ + return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT); +} + struct mlx5_rx_tun_attr { u16 decap_vport; union { @@ -149,8 +168,11 @@ enum { #define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT) -int mlx5e_tc_esw_init(struct rhashtable *tc_ht); -void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv); +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv); + +int mlx5e_tc_ht_init(struct rhashtable *tc_ht); +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags); @@ -243,11 +265,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, u32 data); int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow); - -struct mlx5e_tc_flow; -u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, struct flow_match_basic *match, bool outer, @@ -289,6 +308,8 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, #else /* CONFIG_MLX5_CLS_ACT */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_ht_init(struct rhashtable *tc_ht) { return 0; } +static inline void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) {} static inline int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ee7ecb88adc1..2dc48406cd08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -53,117 +53,6 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) } } -#ifdef CONFIG_MLX5_CORE_EN_DCB -static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) -{ - int dscp_cp = 0; - - if (skb->protocol == htons(ETH_P_IP)) - dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; - else if (skb->protocol == htons(ETH_P_IPV6)) - dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; - - return priv->dcbx_dp.dscp2prio[dscp_cp]; -} -#endif - -static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - int up = 0; - - if (!netdev_get_num_tc(dev)) - goto return_txq; - -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) - up = mlx5e_get_dscp_up(priv, skb); - else -#endif - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get_prio(skb); - -return_txq: - return priv->port_ptp_tc2realtxq[up]; -} - -static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, - u16 htb_maj_id) -{ - u16 classid; - - if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) - classid = TC_H_MIN(skb->priority); - else - classid = READ_ONCE(priv->htb.defcls); - - if (!classid) - return 0; - - return mlx5e_get_txq_by_classid(priv, classid); -} - -u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - int num_tc_x_num_ch; - int txq_ix; - int up = 0; - int ch_ix; - - /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ - num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); - if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { - struct mlx5e_ptp *ptp_channel; - - /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ - u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); - - if (unlikely(htb_maj_id)) { - txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id); - if (txq_ix > 0) - return txq_ix; - } - - ptp_channel = READ_ONCE(priv->channels.ptp); - if (unlikely(ptp_channel && - test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && - mlx5e_use_ptpsq(skb))) - return mlx5e_select_ptpsq(dev, skb); - - txq_ix = netdev_pick_tx(dev, skb, NULL); - /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. - * If they are selected, switch to regular queues. - * Driver to select these queues only at mlx5e_select_ptpsq() - * and mlx5e_select_htb_queue(). - */ - if (unlikely(txq_ix >= num_tc_x_num_ch)) - txq_ix %= num_tc_x_num_ch; - } else { - txq_ix = netdev_pick_tx(dev, skb, NULL); - } - - if (!netdev_get_num_tc(dev)) - return txq_ix; - -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) - up = mlx5e_get_dscp_up(priv, skb); - else -#endif - if (skb_vlan_tag_present(skb)) - up = skb_vlan_tag_get_prio(skb); - - /* Normalize any picked txq_ix to [0, num_channels), - * So we can return a txq_ix that matches the channel and - * packet UP. - */ - ch_ix = priv->txq2sq[txq_ix]->ch_ix; - - return priv->channel_tc2realtxq[ch_ix][up]; -} - static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) { #define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) @@ -544,7 +433,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe *wqe; u16 pi; - pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -645,7 +534,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_tx_skb_update_hwts_flags(skb); - if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { + if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ cseg = mlx5e_tx_mpwqe_session_complete(sq); @@ -691,8 +580,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) struct mlx5e_txqsq *sq; u16 pi; + /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the + * queue being changed is disabled, and smp_wmb guarantees that the + * changes are visible before mlx5e_xmit tries to read from txq2sq. It + * guarantees that the value of txq2sq[qid] doesn't change while + * mlx5e_xmit is running on queue number qid. smb_wmb is paired with + * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. + */ sq = priv->txq2sq[skb_get_queue_mapping(skb)]; if (unlikely(!sq)) { + /* Two cases when sq can be NULL: + * 1. The HTB node is registered, and mlx5e_select_queue + * selected its queue ID, but the SQ itself is not yet created. + * 2. HTB SQ creation failed. Similar to the previous case, but + * the SQ won't be created. + */ dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index 39e948bc1204..a994e71e05c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -92,6 +92,7 @@ static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp; vport->ingress.offloads.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); @@ -117,6 +118,36 @@ static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw, vport->ingress.offloads.modify_metadata_rule = NULL; } +static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.fg = vport->ingress.offloads.drop_grp; + flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } + + vport->ingress.offloads.drop_rule = flow_rule; +out: + return err; +} + +static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.drop_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.drop_rule); + vport->ingress.offloads.drop_rule = NULL; +} + static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -154,6 +185,7 @@ static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw, { esw_acl_ingress_allow_rule_destroy(vport); esw_acl_ingress_mod_metadata_destroy(esw, vport); + esw_acl_ingress_src_port_drop_destroy(esw, vport); } static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -170,10 +202,29 @@ static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, if (!flow_group_in) return -ENOMEM; + if (vport->vport == MLX5_VPORT_UPLINK) { + /* This group can hold an FTE to drop all traffic. + * Need in case LAG is enabled. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto drop_err; + } + vport->ingress.offloads.drop_grp = g; + flow_index++; + } + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { /* This group is to hold FTE to match untagged packets when prio_tag * is enabled. */ + memset(flow_group_in, 0, inlen); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET(create_flow_group_in, flow_group_in, @@ -221,6 +272,11 @@ metadata_err: vport->ingress.offloads.metadata_prio_tag_grp = NULL; } prio_tag_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } +drop_err: kvfree(flow_group_in); return ret; } @@ -236,6 +292,11 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); vport->ingress.offloads.metadata_prio_tag_grp = NULL; } + + if (vport->ingress.offloads.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } } int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, @@ -252,6 +313,8 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) num_ftes++; + if (vport->vport == MLX5_VPORT_UPLINK) + num_ftes++; if (esw_acl_ingress_prio_tag_enabled(esw, vport)) num_ftes++; @@ -320,3 +383,27 @@ out: vport->metadata = vport->default_metadata; return err; } + +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return PTR_ERR(vport); + } + + return esw_acl_ingress_src_port_drop_create(esw, vport); +} + +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return; + } + + esw_acl_ingress_src_port_drop_destroy(esw, vport); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h index c57869b93d60..11d3d3978848 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -6,6 +6,7 @@ #include "eswitch.h" +#ifdef CONFIG_MLX5_ESWITCH /* Eswitch acl egress external APIs */ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); @@ -25,5 +26,19 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vpor void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, u32 metadata); +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num); +#else /* CONFIG_MLX5_ESWITCH */ +static void +mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, + u16 vport_num) +{} + +static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, + u16 vport_num) +{ + return 0; +} +#endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c index c275fe028b6d..0abef71cb839 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -86,7 +86,7 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, mlx5_eswitch_is_vf_vport(esw, vport_num) && esw->dev == dest_mdev && attr->ip_version && - attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; } u16 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ead5e8acc8be..973281bdb4a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -113,8 +113,11 @@ struct vport_ingress { * packet with metadata. */ struct mlx5_flow_group *metadata_allmatch_grp; + /* Optional group to add a drop all rule */ + struct mlx5_flow_group *drop_grp; struct mlx5_modify_hdr *modify_metadata; struct mlx5_flow_handle *modify_metadata_rule; + struct mlx5_flow_handle *drop_rule; } offloads; }; @@ -448,22 +451,6 @@ enum { MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2), }; -enum { - MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), - MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), - MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), - MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3), - MLX5_ESW_ATTR_FLAG_SAMPLE = BIT(4), - MLX5_ESW_ATTR_FLAG_ACCEPT = BIT(5), -}; - -/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ -static inline bool -mlx5_esw_attr_flags_skip(u32 attr_flags) -{ - return attr_flags & (MLX5_ESW_ATTR_FLAG_SLOW_PATH | MLX5_ESW_ATTR_FLAG_ACCEPT); -} - struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_core_dev *in_mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index cfcd72bad9af..35cf4cb3098e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -180,7 +180,7 @@ esw_setup_decap_indir(struct mlx5_eswitch *esw, { struct mlx5_flow_table *ft; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; ft = mlx5_esw_indir_table_get(esw, attr, spec, @@ -201,12 +201,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, static int esw_setup_sampler_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, - struct mlx5_flow_attr *attr, + u32 sampler_id, int i) { flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; - dest[i].sampler_id = attr->sample_attr->sampler_id; + dest[i].sampler_id = sampler_id; return 0; } @@ -297,7 +297,7 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; int err; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; /* flow steering cannot handle more than one dest with the same ft @@ -364,7 +364,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; int j, err; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { @@ -463,15 +463,16 @@ esw_setup_dests(struct mlx5_flow_destination *dest, if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && esw_src_port_rewrite_supported(esw)) - attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { - esw_setup_sampler_dest(dest, flow_act, attr, *i); + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE && + !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) { + esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); (*i)++; } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; - } else if (mlx5_esw_attr_flags_skip(attr->flags)) { + } else if (mlx5e_tc_attr_flags_skip(attr->flags)) { esw_setup_slow_path_dest(dest, flow_act, chains, *i); (*i)++; } else if (attr->dest_chain) { @@ -498,7 +499,7 @@ esw_cleanup_dests(struct mlx5_eswitch *esw, if (attr->dest_ft) { esw_cleanup_decap_indir(esw, attr); - } else if (!mlx5_esw_attr_flags_skip(attr->flags)) { + } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) { if (attr->dest_chain) esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); else if (esw_is_indir_table(esw, attr)) @@ -589,7 +590,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, else fdb = attr->ft; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) + if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT)) mlx5_eswitch_set_rule_source_port(esw, spec, attr, esw_attr->in_mdev->priv.eswitch, esw_attr->in_rep->vport); @@ -721,7 +722,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - if (!mlx5_esw_attr_flags_skip(attr->flags)) { + if (!mlx5e_tc_attr_flags_skip(attr->flags)) { /* unref the term table */ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { if (esw_attr->dests[i].termtbl) @@ -863,7 +864,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (err) goto unlock; - attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED; vport = esw_vlan_action_get_vport(esw_attr, push, pop); @@ -871,7 +872,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, /* tracks VF --> wire rules without vlan push action */ if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; - attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; } goto unlock; @@ -902,7 +903,7 @@ skip_set_push: } out: if (!err) - attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED; + attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; unlock: mutex_unlock(&esw->state_lock); return err; @@ -921,7 +922,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) return 0; - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED)) + if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED)) return 0; push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); @@ -2378,60 +2379,6 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); } -static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) -{ - u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; - u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; - struct mlx5_eswitch *esw; - struct mlx5_flow_root_namespace *root; - struct mlx5_flow_namespace *ns; - struct mlx5_vport *vport; - int err; - - MLX5_SET(set_flow_table_root_in, in, opcode, - MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); - MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL); - MLX5_SET(set_flow_table_root_in, in, other_vport, 1); - MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK); - - if (master) { - esw = master->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1); - MLX5_SET(set_flow_table_root_in, in, table_vport_number, - MLX5_VPORT_UPLINK); - - ns = mlx5_get_flow_vport_acl_namespace(master, - MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->index); - root = find_root(&ns->node); - mutex_lock(&root->chain_lock); - - MLX5_SET(set_flow_table_root_in, in, - table_eswitch_owner_vhca_id_valid, 1); - MLX5_SET(set_flow_table_root_in, in, - table_eswitch_owner_vhca_id, - MLX5_CAP_GEN(master, vhca_id)); - MLX5_SET(set_flow_table_root_in, in, table_id, - root->root_ft->id); - } else { - esw = slave->priv.eswitch; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - ns = mlx5_get_flow_vport_acl_namespace(slave, - MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->index); - root = find_root(&ns->node); - mutex_lock(&root->chain_lock); - MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id); - } - - err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); - mutex_unlock(&root->chain_lock); - - return err; -} - static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, struct mlx5_core_dev *slave) { @@ -2613,15 +2560,10 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, { int err; - err = esw_set_uplink_slave_ingress_root(master_esw->dev, - slave_esw->dev); - if (err) - return -EINVAL; - err = esw_set_slave_root_fdb(master_esw->dev, slave_esw->dev); if (err) - goto err_fdb; + return err; err = esw_set_master_egress_rule(master_esw->dev, slave_esw->dev); @@ -2633,9 +2575,6 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, err_acl: esw_set_slave_root_fdb(NULL, slave_esw->dev); -err_fdb: - esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); - return err; } @@ -2644,7 +2583,6 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, { esw_unset_master_egress_rule(master_esw->dev); esw_set_slave_root_fdb(NULL, slave_esw->dev); - esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) @@ -2841,6 +2779,19 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } +#define MLX5_ESW_METADATA_RSVD_UPLINK 1 + +/* Share the same metadata for uplink's. This is fine because: + * (a) In shared FDB mode (LAG) both uplink's are treated the + * same and tagged with the same metadata. + * (b) In non shared FDB mode, packets from physical port0 + * cannot hit eswitch of PF1 and vice versa. + */ +static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw) +{ + return MLX5_ESW_METADATA_RSVD_UPLINK; +} + u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) { u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1; @@ -2855,8 +2806,10 @@ u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) return 0; /* Metadata is 4 bits of PFNUM and 12 bits of unique id */ - /* Use only non-zero vport_id (1-4095) for all PF's */ - id = ida_alloc_range(&esw->offloads.vport_metadata_ida, 1, vport_end_ida, GFP_KERNEL); + /* Use only non-zero vport_id (2-4095) for all PF's */ + id = ida_alloc_range(&esw->offloads.vport_metadata_ida, + MLX5_ESW_METADATA_RSVD_UPLINK + 1, + vport_end_ida, GFP_KERNEL); if (id < 0) return 0; id = (pf_num << ESW_VPORT_BITS) | id; @@ -2874,7 +2827,11 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + if (vport->vport == MLX5_VPORT_UPLINK) + vport->default_metadata = mlx5_esw_match_metadata_reserved(esw); + else + vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + vport->metadata = vport->default_metadata; return vport->metadata ? 0 : -ENOSPC; } @@ -2885,6 +2842,9 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, if (!vport->default_metadata) return; + if (vport->vport == MLX5_VPORT_UPLINK) + return; + WARN_ON(vport->metadata != vport->default_metadata); mlx5_esw_match_metadata_free(esw, vport->default_metadata); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 182306bbefaa..ee568bf34ae2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -219,12 +219,14 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || - mlx5_esw_attr_flags_skip(attr->flags) || + mlx5e_tc_attr_flags_skip(attr->flags) || (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) return false; /* push vlan on RX */ - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) return true; /* hairpin */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index dafe341358c7..a0ac17c3f12f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -152,6 +152,12 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) return 0; } +static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, struct mlx5_core_dev *slave, bool ft_id_valid, @@ -971,6 +977,12 @@ static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns, return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); } +static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, .destroy_flow_table = mlx5_cmd_destroy_flow_table, @@ -990,6 +1002,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = { .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_get_capabilities, }; static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { @@ -1011,6 +1024,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { .set_peer = mlx5_cmd_stub_set_peer, .create_ns = mlx5_cmd_stub_create_ns, .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_stub_get_capabilities, }; const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 220ec632d35a..274004e80f03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -101,6 +101,9 @@ struct mlx5_flow_cmds { u16 format_id, u32 *match_mask); int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns, int definer_id); + + u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type); }; int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 537c82b9aa53..816d991f7621 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1696,6 +1696,7 @@ static void free_match_list(struct match_list *head, bool ft_locked) static int build_match_list(struct match_list *match_head, struct mlx5_flow_table *ft, const struct mlx5_flow_spec *spec, + struct mlx5_flow_group *fg, bool ft_locked) { struct rhlist_head *tmp, *list; @@ -1710,6 +1711,9 @@ static int build_match_list(struct match_list *match_head, rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; + if (fg && fg != g) + continue; + if (unlikely(!tree_get_node(&g->node))) continue; @@ -1889,6 +1893,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!check_valid_spec(spec)) return ERR_PTR(-EINVAL); + if (flow_act->fg && ft->autogroup.active) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act, ft)) return ERR_PTR(-EINVAL); @@ -1898,7 +1905,7 @@ search_again_locked: version = atomic_read(&ft->node.version); /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec, take_write); + err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write); if (err) { if (take_write) up_write_ref_node(&ft->node, false); @@ -3042,6 +3049,22 @@ void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) steering->esw_ingress_root_ns = NULL; } +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(dev, type); + if (!ns) + return 0; + + root = find_root(&ns->node); + if (!root) + return 0; + + return root->cmds->get_capabilities(root, root->table_type); +} + static int init_egress_root_ns(struct mlx5_flow_steering *steering) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5469b08d635f..c488a7c5b07e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -120,6 +120,11 @@ enum mlx5_flow_steering_mode { MLX5_FLOW_STEERING_MODE_SMFS }; +enum mlx5_flow_steering_capabilty { + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0, + MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1, +}; + struct mlx5_flow_steering { struct mlx5_core_dev *dev; enum mlx5_flow_steering_mode mode; @@ -301,6 +306,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); + struct mlx5_flow_root_namespace *find_root(struct fs_node *node); #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 84dbe46d5ede..4aa22dce9b77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -57,7 +57,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1); } -static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) +static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, + u8 *reset_type, u8 *reset_state) { u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; @@ -71,25 +72,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r *reset_level = MLX5_GET(mfrl_reg, out, reset_level); if (reset_type) *reset_type = MLX5_GET(mfrl_reg, out, reset_type); + if (reset_state) + *reset_state = MLX5_GET(mfrl_reg, out, reset_state); return 0; } int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) { - return mlx5_reg_mfrl_query(dev, reset_level, reset_type); + return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL); } -int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel) +static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack) +{ + u8 reset_state; + + if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state)) + goto out; + + switch (reset_state) { + case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION: + case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS: + NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered"); + return -EBUSY; + case MLX5_MFRL_REG_RESET_STATE_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout"); + return -ETIMEDOUT; + case MLX5_MFRL_REG_RESET_STATE_NACK: + NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset"); + return -EPERM; + } + +out: + NL_SET_ERR_MSG_MOD(extack, "Sync reset failed"); + return -EIO; +} + +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; int err; set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); - err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true); - if (err) - clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); - return err; + + MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1); + err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MFRL, 0, 1, false); + if (!err) + return 0; + + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) + return mlx5_fw_reset_get_reset_state_err(dev, extack); + + NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed"); + return mlx5_cmd_check(dev, err, in, out); } int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index 7761ee5fc7d0..694fc7cb2684 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -9,7 +9,8 @@ void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable); bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev); int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); -int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel); +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack); int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 4ddf6b330a44..6cad3b72c133 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -31,15 +31,22 @@ */ #include <linux/netdevice.h> +#include <net/bonding.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/eswitch.h> #include <linux/mlx5/vport.h> #include "lib/devcom.h" #include "mlx5_core.h" #include "eswitch.h" +#include "esw/acl/ofld.h" #include "lag.h" #include "mp.h" +enum { + MLX5_LAG_EGRESS_PORT_1 = 1, + MLX5_LAG_EGRESS_PORT_2, +}; + /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired * under it). @@ -193,15 +200,71 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && tracker->netdev_state[MLX5_LAG_P2].link_up; - *port1 = 1; - *port2 = 2; + *port1 = MLX5_LAG_EGRESS_PORT_1; + *port2 = MLX5_LAG_EGRESS_PORT_2; if ((!p1en && !p2en) || (p1en && p2en)) return; if (p1en) - *port2 = 1; + *port2 = MLX5_LAG_EGRESS_PORT_1; + else + *port1 = MLX5_LAG_EGRESS_PORT_2; +} + +static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) +{ + return ldev->pf[MLX5_LAG_P1].has_drop || ldev->pf[MLX5_LAG_P2].has_drop; +} + +static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (!ldev->pf[i].has_drop) + continue; + + mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, + MLX5_VPORT_UPLINK); + ldev->pf[i].has_drop = false; + } +} + +static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + struct mlx5_core_dev *inactive; + u8 v2p_port1, v2p_port2; + int inactive_idx; + int err; + + /* First delete the current drop rule so there won't be any dropped + * packets + */ + mlx5_lag_drop_rule_cleanup(ldev); + + if (!ldev->tracker.has_inactive) + return; + + mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2); + + if (v2p_port1 == MLX5_LAG_EGRESS_PORT_1) { + inactive = dev1; + inactive_idx = MLX5_LAG_P2; + } else { + inactive = dev0; + inactive_idx = MLX5_LAG_P1; + } + + err = mlx5_esw_acl_ingress_vport_drop_rule_create(inactive->priv.eswitch, + MLX5_VPORT_UPLINK); + if (!err) + ldev->pf[inactive_idx].has_drop = true; else - *port1 = 2; + mlx5_core_err(inactive, + "Failed to create lag drop rule, error: %d", err); } static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2) @@ -238,6 +301,10 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]); } + + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !(ldev->flags & MLX5_LAG_FLAG_ROCE)) + mlx5_lag_drop_rule_setup(ldev, tracker); } static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, @@ -339,6 +406,10 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, return err; } + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !roce_lag) + mlx5_lag_drop_rule_setup(ldev, tracker); + ldev->flags |= flags; ldev->shared_fdb = shared_fdb; return 0; @@ -347,6 +418,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); u8 flags = ldev->flags; @@ -356,8 +428,8 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) mlx5_lag_mp_reset(ldev); if (ldev->shared_fdb) { - mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, - ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); + mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); ldev->shared_fdb = false; } @@ -372,11 +444,15 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) "Failed to deactivate VF LAG; driver restart required\n" "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); } - } else if (flags & MLX5_LAG_FLAG_HASH_BASED) { - mlx5_lag_port_sel_destroy(ldev); + return err; } - return err; + if (flags & MLX5_LAG_FLAG_HASH_BASED) + mlx5_lag_port_sel_destroy(ldev); + if (mlx5_lag_has_drop_rule(ldev)) + mlx5_lag_drop_rule_cleanup(ldev); + + return 0; } static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) @@ -613,6 +689,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, struct net_device *upper = info->upper_dev, *ndev_tmp; struct netdev_lag_upper_info *lag_upper_info = NULL; bool is_bonded, is_in_lag, mode_supported; + bool has_inactive = 0; + struct slave *slave; int bond_status = 0; int num_slaves = 0; int changed = 0; @@ -632,8 +710,12 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx >= 0) + if (idx >= 0) { + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); bond_status |= (1 << idx); + } num_slaves++; } @@ -648,6 +730,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, tracker->hash_type = lag_upper_info->hash_type; } + tracker->has_inactive = has_inactive; /* Determine bonding status: * A device is considered bonded if both its physical ports are slaves * of the same lag master, and only them. @@ -704,6 +787,38 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, return 1; } +static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev) +{ + struct net_device *ndev_tmp; + struct slave *slave; + bool has_inactive = 0; + int idx; + + if (!netif_is_lag_master(ndev)) + return 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx < 0) + continue; + + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); + } + rcu_read_unlock(); + + if (tracker->has_inactive == has_inactive) + return 0; + + tracker->has_inactive = has_inactive; + + return 1; +} + static int mlx5_lag_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -712,7 +827,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, struct mlx5_lag *ldev; int changed = 0; - if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) + if (event != NETDEV_CHANGEUPPER && + event != NETDEV_CHANGELOWERSTATE && + event != NETDEV_CHANGEINFODATA) return NOTIFY_DONE; ldev = container_of(this, struct mlx5_lag, nb); @@ -728,6 +845,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, changed = mlx5_handle_changelowerstate_event(ldev, &tracker, ndev, ptr); break; + case NETDEV_CHANGEINFODATA: + changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); + break; } ldev->tracker = tracker; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index e5d231c31b54..cbf9a9003e55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -28,6 +28,7 @@ enum { struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; + bool has_drop; }; /* Used for collection of netdev event info. */ @@ -35,6 +36,7 @@ struct lag_tracker { enum netdev_lag_tx_type tx_type; struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; unsigned int is_bonded:1; + unsigned int has_inactive:1; enum netdev_lag_hash hash_type; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 626aa60b6099..4a6ec15ef046 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -50,7 +50,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, enum mlx5_lag_port_affinity port) { - struct lag_tracker tracker; + struct lag_tracker tracker = {}; if (!__mlx5_lag_is_multipath(ldev)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h index 4bad6a5fde56..f240ffe5116c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -92,13 +92,6 @@ mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) { } - -static inline int -mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent, - void *buf, int len) -{ - return 0; -} #endif #endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bba72b220cc3..d8d36477b97f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -736,10 +736,9 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out); if (err) { - u32 syndrome; - u8 status; + u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome); + u8 status = MLX5_GET(query_issi_out, query_out, status); - mlx5_cmd_mbox_status(query_out, &status, &syndrome); if (!status || syndrome == MLX5_DRIVER_SYND) { mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", err, status, syndrome); @@ -1488,8 +1487,8 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) INIT_LIST_HEAD(&priv->pgdir_list); priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); - priv->dbg_root = debugfs_create_dir(dev_name(dev->device), - mlx5_debugfs_root); + priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device), + mlx5_debugfs_root); INIT_LIST_HEAD(&priv->traps); err = mlx5_tout_init(dev); @@ -1525,7 +1524,7 @@ err_pagealloc_init: err_health_init: mlx5_tout_cleanup(dev); err_timeout_init: - debugfs_remove(dev->priv.dbg_root); + debugfs_remove(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); @@ -1543,7 +1542,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_tout_cleanup(dev); - debugfs_remove_recursive(dev->priv.dbg_root); + debugfs_remove_recursive(dev->priv.dbg.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index f6b5451328fc..e0543b860144 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -327,11 +327,12 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail, bool ec_function) + int event, bool ec_function) { u32 function = get_function(func_id, ec_function); u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); + int notify_fail = event; u64 addr; int err; u32 *in; @@ -351,8 +352,10 @@ retry: if (err) { if (err == -ENOMEM) err = alloc_system_page(dev, function); - if (err) + if (err) { + dev->priv.fw_pages_alloc_failed += (npages - i); goto out_4k; + } goto retry; } @@ -365,11 +368,20 @@ retry: MLX5_SET(manage_pages_in, in, input_num_entries, npages); MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out)); + if (err == -EREMOTEIO) { + notify_fail = 0; + /* if triggered by FW and failed by FW ignore */ + if (event) { + err = 0; + goto out_dropped; + } + } if (err) { + err = mlx5_cmd_check(dev, err, in, out); mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); - goto out_4k; + goto out_dropped; } dev->priv.fw_pages += npages; @@ -384,6 +396,8 @@ retry: kvfree(in); return 0; +out_dropped: + dev->priv.give_pages_dropped += npages; out_4k: for (i--; i >= 0; i--) free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function); @@ -455,7 +469,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 i = 0; if (!mlx5_cmd_is_down(dev)) - return mlx5_cmd_exec(dev, in, in_size, out, out_size); + return mlx5_cmd_do(dev, in, in_size, out, out_size); /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); @@ -479,7 +493,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int *nclaimed, bool ec_function) + int *nclaimed, bool event, bool ec_function) { u32 function = get_function(func_id, ec_function); int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); @@ -507,6 +521,14 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, func_id, npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); if (err) { + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + dev->priv.reclaim_pages_discard += npages; + } + /* if triggered by FW event and failed by FW then ignore */ + if (event && err == -EREMOTEIO) + err = 0; + if (err) { + err = mlx5_cmd_check(dev, err, in, out); mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } @@ -546,7 +568,7 @@ static void pages_work_handler(struct work_struct *work) release_all_pages(dev, req->func_id, req->ec_function); else if (req->npages < 0) err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, - req->ec_function); + true, req->ec_function); else if (req->npages > 0) err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); @@ -645,7 +667,7 @@ static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, int err; err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(), - &nclaimed, mlx5_core_is_ecpf(dev)); + &nclaimed, false, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n", err, func_id); @@ -700,12 +722,14 @@ int mlx5_pagealloc_init(struct mlx5_core_dev *dev) return -ENOMEM; xa_init(&dev->priv.page_root_xa); + mlx5_pages_debugfs_init(dev); return 0; } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) { + mlx5_pages_debugfs_cleanup(dev); xa_destroy(&dev->priv.page_root_xa); destroy_workqueue(dev->priv.pg_wq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 7b16a1188aab..289b29a23418 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -33,9 +33,10 @@ #include <linux/mlx5/port.h> #include "mlx5_core.h" -int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, - int size_in, void *data_out, int size_out, - u16 reg_id, int arg, int write) +/* calling with verbose false will not print error to log */ +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose) { int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; @@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, MLX5_SET(access_register_in, in, argument, arg); MLX5_SET(access_register_in, in, register_id, reg_id); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); + if (verbose) + err = mlx5_cmd_check(dev, err, in, out); if (err) goto out; @@ -69,6 +72,15 @@ out: kvfree(in); return err; } +EXPORT_SYMBOL_GPL(mlx5_access_reg); + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_id, int arg, int write) +{ + return mlx5_access_reg(dev, data_in, size_in, data_out, size_out, + reg_id, arg, write, true); +} EXPORT_SYMBOL_GPL(mlx5_core_access_reg); int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index c61a5e83c78c..743422acc3d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -669,15 +669,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, case DR_ACTION_TYP_VPORT: attr.hit_gvmi = action->vport->caps->vhca_gvmi; dest_action = action; - if (rx_rule) { - if (action->vport->caps->num == MLX5_VPORT_UPLINK) { - mlx5dr_dbg(dmn, "Device doesn't support Loopback on WIRE vport\n"); - return -EOPNOTSUPP; - } - attr.final_icm_addr = action->vport->caps->icm_address_rx; - } else { - attr.final_icm_addr = action->vport->caps->icm_address_tx; - } + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; break; case DR_ACTION_TYP_POP_VLAN: if (!rx_rule && !(dmn->ste_ctx->actions_caps & diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index 2784cd59fefe..2e8b109fb34e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -630,7 +630,7 @@ void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn) } dmn->dump_info.steering_debugfs = - debugfs_create_dir("steering", dev->priv.dbg_root); + debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev)); dmn->dump_info.fdb_debugfs = debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 5fa7f9d6d8b9..fc6ae49b5ecc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -8,7 +8,7 @@ #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ ((dmn)->info.caps.dmn_type##_sw_owner || \ ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ - (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_6DX)) + (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7)) static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 38971fe1dfe1..a4b5b415df90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -47,6 +47,11 @@ static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) return spec->ttl_hoplimit; } +static bool dr_mask_is_ipv4_ihl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ipv4_ihl; +} + #define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \ (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \ (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \ @@ -103,7 +108,7 @@ dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) static bool dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED); } @@ -144,7 +149,7 @@ static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *mi static bool dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED); } @@ -261,13 +266,13 @@ static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask, static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED); } static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps) { - return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED); } @@ -507,7 +512,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_ttl_set(&mask.outer)) + if (dr_mask_is_ttl_set(&mask.outer) || + dr_mask_is_ipv4_ihl_set(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], &mask, inner, rx); } @@ -614,7 +620,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], &mask, inner, rx); - if (dr_mask_is_ttl_set(&mask.inner)) + if (dr_mask_is_ttl_set(&mask.inner) || + dr_mask_is_ipv4_ihl_set(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], &mask, inner, rx); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 187e29b409b6..518e949847a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -523,8 +523,8 @@ void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_tx(dmn, action_type_set, hw_ste_arr, - attr, added_stes); + ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); } void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, @@ -534,8 +534,8 @@ void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_rx(dmn, action_type_set, hw_ste_arr, - attr, added_stes); + ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); } const struct mlx5dr_ste_action_modify_field * @@ -793,6 +793,7 @@ static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bo spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr); spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr); + spec->ipv4_ihl = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ipv4_ihl, clr); spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr); spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr); @@ -1360,15 +1361,14 @@ void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_tnl_header_0_1_init(sb, mask); } -static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = { - [MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0, - [MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1, -}; - struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version) { - if (version > MLX5_STEERING_FORMAT_CONNECTX_6DX) - return NULL; + if (version == MLX5_STEERING_FORMAT_CONNECTX_5) + return mlx5dr_ste_get_ctx_v0(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_6DX) + return mlx5dr_ste_get_ctx_v1(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_7) + return mlx5dr_ste_get_ctx_v2(); - return mlx5dr_ste_ctx_arr[version]; + return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index ca8fa32b8680..17513baff9b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -161,11 +161,13 @@ struct mlx5dr_ste_ctx { u32 actions_caps; void (*set_actions_rx)(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *hw_ste_arr, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); void (*set_actions_tx)(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *hw_ste_arr, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); @@ -197,7 +199,8 @@ struct mlx5dr_ste_ctx { void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; -extern struct mlx5dr_ste_ctx ste_ctx_v0; -extern struct mlx5dr_ste_ctx ste_ctx_v1; +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void); #endif /* _DR_STE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index 2d62950f7a29..5a322335f204 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -408,6 +408,7 @@ static void dr_ste_v0_arr_init_next(u8 **last_ste, static void dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) @@ -477,6 +478,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, static void dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) @@ -1152,6 +1154,7 @@ dr_ste_v0_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, ihl, spec, ipv4_ihl); return 0; } @@ -1897,7 +1900,7 @@ static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag; } -struct mlx5dr_ste_ctx ste_ctx_v0 = { +static struct mlx5dr_ste_ctx ste_ctx_v0 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init, .build_eth_l3_ipv6_src_init = &dr_ste_v0_build_eth_l3_ipv6_src_init, @@ -1950,3 +1953,8 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = { .set_action_copy = &dr_ste_v0_set_action_copy, .set_action_decap_l3_list = &dr_ste_v0_set_action_decap_l3_list, }; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void) +{ + return &ste_ctx_v0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 6ca06800f1d9..fcb962c6db2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -3,7 +3,7 @@ #include <linux/types.h> #include "mlx5_ifc_dr_ste_v1.h" -#include "dr_ste.h" +#include "dr_ste_v1.h" #define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ @@ -121,12 +121,12 @@ enum { DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2 = 0x8c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3 = 0x8d, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4 = 0x8e, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5 = 0x8f, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6 = 0x90, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7 = 0x91, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, }; static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = { @@ -223,22 +223,22 @@ static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field .hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { - .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3, .start = 0, .end = 31, + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, }, [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, @@ -262,7 +262,7 @@ static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type); } -static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) { u64 index = miss_addr >> 6; @@ -270,7 +270,7 @@ static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index); } -static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) { u64 index = ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | @@ -279,12 +279,12 @@ static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) return index << 6; } -static void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) { MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask); } -static u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p) +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p) { return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask); } @@ -295,13 +295,13 @@ static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type) MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF); } -static void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) { MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8); MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF); } -static u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) { u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format); u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx); @@ -314,7 +314,7 @@ static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); } -static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) { u64 index = (icm_addr >> 5) | ht_size; @@ -322,8 +322,7 @@ static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index); } -static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, - bool is_rx, u16 gvmi) +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi) { dr_ste_v1_set_lu_type(hw_ste_p, lu_type); dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); @@ -333,8 +332,7 @@ static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi); } -static void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, - u32 ste_size) +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size) { u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL; u8 *mask = tag + DR_STE_SIZE_TAG; @@ -511,11 +509,12 @@ static void dr_ste_v1_arr_init_next_match(u8 **last_ste, memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action)); } -static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, - u8 *action_type_set, - u8 *last_ste, - struct mlx5dr_ste_actions_attr *attr, - u32 *added_stes) +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) { u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; @@ -533,7 +532,10 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; - allow_modify_hdr = false; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; } if (action_type_set[DR_ACTION_TYP_CTR]) @@ -631,11 +633,12 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } -static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, - u8 *action_type_set, - u8 *last_ste, - struct mlx5dr_ste_actions_attr *attr, - u32 *added_stes) +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) { u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; @@ -677,13 +680,16 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; - allow_modify_hdr = false; - allow_ctr = false; } dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; + allow_ctr = false; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; } if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { @@ -731,9 +737,9 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; allow_modify_hdr = true; - allow_ctr = false; } dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + allow_ctr = false; } if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { @@ -800,11 +806,11 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } -static void dr_ste_v1_set_action_set(u8 *d_action, - u8 hw_field, - u8 shifter, - u8 length, - u32 data) +void dr_ste_v1_set_action_set(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) { shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET); @@ -814,11 +820,11 @@ static void dr_ste_v1_set_action_set(u8 *d_action, MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data); } -static void dr_ste_v1_set_action_add(u8 *d_action, - u8 hw_field, - u8 shifter, - u8 length, - u32 data) +void dr_ste_v1_set_action_add(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) { shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD); @@ -828,12 +834,12 @@ static void dr_ste_v1_set_action_add(u8 *d_action, MLX5_SET(ste_double_action_add_v1, d_action, add_value, data); } -static void dr_ste_v1_set_action_copy(u8 *d_action, - u8 dst_hw_field, - u8 dst_shifter, - u8 dst_len, - u8 src_hw_field, - u8 src_shifter) +void dr_ste_v1_set_action_copy(u8 *d_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) { dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; @@ -848,11 +854,11 @@ static void dr_ste_v1_set_action_copy(u8 *d_action, #define DR_STE_DECAP_L3_ACTION_NUM 8 #define DR_STE_L2_HDR_MAX_SZ 20 -static int dr_ste_v1_set_action_decap_l3_list(void *data, - u32 data_sz, - u8 *hw_action, - u32 hw_action_sz, - u16 *used_hw_action_num) +int dr_ste_v1_set_action_decap_l3_list(void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num) { u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {}; void *data_ptr = padded_data; @@ -977,8 +983,8 @@ static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1001,8 +1007,8 @@ static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask); @@ -1025,8 +1031,8 @@ static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask); @@ -1060,8 +1066,8 @@ static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *va return 0; } -static void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask); @@ -1079,8 +1085,8 @@ static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid); DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi); DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio); - DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); // ? - DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); // ? + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version); if (mask->svlan_tag || mask->cvlan_tag) { @@ -1201,8 +1207,8 @@ static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value, return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); } -static void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1234,8 +1240,8 @@ static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); } -static void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1314,8 +1320,8 @@ static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask); @@ -1331,12 +1337,13 @@ static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, ihl, spec, ipv4_ihl); return 0; } -static void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask); @@ -1375,8 +1382,8 @@ static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask); @@ -1399,8 +1406,8 @@ static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask); @@ -1426,8 +1433,8 @@ static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask); @@ -1471,8 +1478,8 @@ static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask); @@ -1506,8 +1513,8 @@ static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *valu return 0; } -static void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); @@ -1547,8 +1554,8 @@ static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *valu return 0; } -static void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); @@ -1594,8 +1601,8 @@ static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask); @@ -1616,8 +1623,8 @@ static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask); @@ -1643,8 +1650,8 @@ static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask); @@ -1673,9 +1680,8 @@ dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, return 0; } -static void -dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask); @@ -1703,9 +1709,8 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, return 0; } -static void -dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask); @@ -1726,8 +1731,8 @@ static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); @@ -1749,8 +1754,8 @@ static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask); @@ -1773,8 +1778,8 @@ static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask); @@ -1837,8 +1842,8 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); @@ -1892,8 +1897,8 @@ static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value, return 0; } -static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); @@ -1901,8 +1906,8 @@ static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; } -static void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); @@ -1926,7 +1931,7 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *va return 0; } -static void +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { @@ -1959,7 +1964,7 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_par return 0; } -static void +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { @@ -1982,8 +1987,8 @@ static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *v return 0; } -static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); @@ -2008,7 +2013,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, return 0; } -static void +void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { @@ -2035,7 +2040,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, return 0; } -static void +void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { @@ -2046,7 +2051,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag; } -struct mlx5dr_ste_ctx ste_ctx_v1 = { +static struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, @@ -2091,7 +2096,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Actions */ .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | DR_STE_CTX_ACTION_CAP_RX_PUSH | - DR_STE_CTX_ACTION_CAP_RX_ENCAP, + DR_STE_CTX_ACTION_CAP_RX_ENCAP | + DR_STE_CTX_ACTION_CAP_POP_MDFY, .set_actions_rx = &dr_ste_v1_set_actions_rx, .set_actions_tx = &dr_ste_v1_set_actions_tx, .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr), @@ -2103,3 +2109,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void) +{ + return &ste_ctx_v1; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h new file mode 100644 index 000000000000..8a1d49790c6e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef _DR_STE_V1_ +#define _DR_STE_V1_ + +#include "dr_types.h" +#include "dr_ste.h" + +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr); +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p); +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask); +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p); +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type); +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p); +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi); +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size); +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_add(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter, + u8 dst_len, u8 src_hw_field, u8 src_shifter); +int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, + u32 hw_action_sz, u16 *used_hw_action_num); +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); + +#endif /* _DR_STE_V1_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c new file mode 100644 index 000000000000..c60fddd125d2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "dr_ste_v1.h" + +enum { + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +static struct mlx5dr_ste_ctx ste_ctx_v2 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = + &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr), + .modify_field_arr = dr_ste_v2_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void) +{ + return &ste_ctx_v2; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 55fcb751e24a..88092fabf55b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -91,6 +91,7 @@ enum mlx5dr_ste_ctx_action_cap { DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0, DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1, DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2, + DR_STE_CTX_ACTION_CAP_POP_MDFY = 1 << 3, }; enum { @@ -555,7 +556,9 @@ struct mlx5dr_match_spec { */ u32 tcp_dport:16; - u32 reserved_auto1:24; + u32 reserved_auto1:16; + u32 ipv4_ihl:4; + u32 reserved_auto2:4; u32 ttl_hoplimit:8; /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 3f311462bedf..045b0cf90063 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -754,6 +754,16 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); } +static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + if (ft_type != FS_FT_FDB || + MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5) + return 0; + + return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX; +} + bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) { return mlx5dr_is_supported(dev); @@ -778,6 +788,7 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { .set_peer = mlx5_cmd_dr_set_peer, .create_ns = mlx5_cmd_dr_create_ns, .destroy_ns = mlx5_cmd_dr_destroy_ns, + .get_capabilities = mlx5_cmd_dr_get_capabilities, }; const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index dfa223415fe2..03efbdf3fec3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -136,7 +136,7 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && (MLX5_CAP_GEN(dev, steering_format_version) <= - MLX5_STEERING_FORMAT_CONNECTX_6DX))); + MLX5_STEERING_FORMAT_CONNECTX_7))); } /* buddy functions & structure */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 866b9357939b..0bf1d64644ba 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -177,17 +177,6 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_driver_priv); -bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) -{ - return mlxsw_core->driver->res_query_enabled; -} -EXPORT_SYMBOL(mlxsw_core_res_query_enabled); - -bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core) -{ - return mlxsw_core->driver->temp_warn_enabled; -} - bool mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, const struct mlxsw_fw_rev *req_rev) @@ -212,6 +201,32 @@ struct mlxsw_event_listener_item { void *priv; }; +static const u8 mlxsw_core_trap_groups[] = { + MLXSW_REG_HTGT_TRAP_GROUP_EMAD, + MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT, +}; + +static int mlxsw_core_trap_groups_set(struct mlxsw_core *mlxsw_core) +{ + char htgt_pl[MLXSW_REG_HTGT_LEN]; + int err; + int i; + + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + + for (i = 0; i < ARRAY_SIZE(mlxsw_core_trap_groups); i++) { + mlxsw_reg_htgt_pack(htgt_pl, mlxsw_core_trap_groups[i], + MLXSW_REG_HTGT_INVALID_POLICER, + MLXSW_REG_HTGT_DEFAULT_PRIORITY, + MLXSW_REG_HTGT_DEFAULT_TC); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); + if (err) + return err; + } + return 0; +} + /****************** * EMAD processing ******************/ @@ -777,16 +792,10 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (err) goto err_trap_register; - err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core); - if (err) - goto err_emad_trap_set; mlxsw_core->emad.use_emad = true; return 0; -err_emad_trap_set: - mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, - mlxsw_core); err_trap_register: destroy_workqueue(mlxsw_core->emad_wq); return err; @@ -1706,7 +1715,7 @@ static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg, } static const struct mlxsw_listener mlxsw_core_health_listener = - MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE); + MLXSW_CORE_EVENTL(mlxsw_core_health_listener_func, MFDE); static int mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl, @@ -2019,7 +2028,7 @@ static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core) struct devlink_health_reporter *fw_fatal; int err; - if (!mlxsw_core->driver->fw_fatal_enabled) + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; fw_fatal = devlink_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops, @@ -2049,7 +2058,7 @@ err_trap_register: static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core) { - if (!mlxsw_core->driver->fw_fatal_enabled) + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return; mlxsw_core_health_fw_fatal_config(mlxsw_core, false); @@ -2069,7 +2078,6 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, const char *device_kind = mlxsw_bus_info->device_kind; struct mlxsw_core *mlxsw_core; struct mlxsw_driver *mlxsw_driver; - struct mlxsw_res *res; size_t alloc_size; int err; @@ -2095,8 +2103,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, mlxsw_core->bus_priv = bus_priv; mlxsw_core->bus_info = mlxsw_bus_info; - res = mlxsw_driver->res_query_enabled ? &mlxsw_core->res : NULL; - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, res); + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->res); if (err) goto err_bus_init; @@ -2122,6 +2130,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } + err = mlxsw_core_trap_groups_set(mlxsw_core); + if (err) + goto err_trap_groups_set; + err = mlxsw_emad_init(mlxsw_core); if (err) goto err_emad_init; @@ -2181,6 +2193,7 @@ err_fw_rev_validate: err_register_params: mlxsw_emad_fini(mlxsw_core); err_emad_init: +err_trap_groups_set: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: mlxsw_ports_fini(mlxsw_core, reload); @@ -2500,6 +2513,9 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, char hpkt_pl[MLXSW_REG_HPKT_LEN]; int err; + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return 0; + err = mlxsw_core_listener_register(mlxsw_core, listener, priv, listener->enabled_on_register); if (err) @@ -2529,6 +2545,9 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, { char hpkt_pl[MLXSW_REG_HPKT_LEN]; + if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) + return; + if (!listener->is_event) { mlxsw_reg_hpkt_pack(hpkt_pl, listener->dis_action, listener->trap_id, listener->dis_trap_group, @@ -2540,6 +2559,45 @@ void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_trap_unregister); +int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv) +{ + int i, err; + + for (i = 0; i < listeners_count; i++) { + err = mlxsw_core_trap_register(mlxsw_core, + &listeners[i], + priv); + if (err) + goto err_listener_register; + } + return 0; + +err_listener_register: + for (i--; i >= 0; i--) { + mlxsw_core_trap_unregister(mlxsw_core, + &listeners[i], + priv); + } + return err; +} +EXPORT_SYMBOL(mlxsw_core_traps_register); + +void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv) +{ + int i; + + for (i = 0; i < listeners_count; i++) { + mlxsw_core_trap_unregister(mlxsw_core, + &listeners[i], + priv); + } +} +EXPORT_SYMBOL(mlxsw_core_traps_unregister); + int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, bool enabled) @@ -3181,9 +3239,6 @@ int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, u16 id; int err; - if (!res) - return 0; - mlxsw_cmd_mbox_zero(mbox); for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index f30bb8614e69..16ee5e90973d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -35,10 +35,6 @@ unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); -bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); - -bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core); - bool mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev, const struct mlxsw_fw_rev *req_rev); @@ -163,6 +159,9 @@ struct mlxsw_listener { .enabled_on_register = true, \ } +#define MLXSW_CORE_EVENTL(_func, _trap_id) \ + MLXSW_EVENTL(_func, _trap_id, CORE_EVENT) + int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core, const struct mlxsw_rx_listener *rxl, void *priv, bool enabled); @@ -181,6 +180,12 @@ int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core, void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, void *priv); +int mlxsw_core_traps_register(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv); +void mlxsw_core_traps_unregister(struct mlxsw_core *mlxsw_core, + const struct mlxsw_listener *listeners, + size_t listeners_count, void *priv); int mlxsw_core_trap_state_set(struct mlxsw_core *mlxsw_core, const struct mlxsw_listener *listener, bool enabled); @@ -315,7 +320,6 @@ struct mlxsw_driver { const struct mlxsw_bus_info *mlxsw_bus_info, struct netlink_ext_ack *extack); void (*fini)(struct mlxsw_core *mlxsw_core); - int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core); int (*port_type_set)(struct mlxsw_core *mlxsw_core, u16 local_port, enum devlink_port_type new_type); int (*port_split)(struct mlxsw_core *mlxsw_core, u16 local_port, @@ -398,9 +402,6 @@ struct mlxsw_driver { u8 txhdr_len; const struct mlxsw_config_profile *profile; - bool res_query_enabled; - bool fw_fatal_enabled; - bool temp_warn_enabled; }; int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 77e82e6cf6e8..fa33caecc91d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -1957,6 +1957,83 @@ int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, } EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter); +/* SIP DIP Action + * -------------- + * The SIP_DIP_ACTION is used for modifying the SIP and DIP fields of the + * packet, e.g. for NAT. The L3 checksum is updated. Also, if the L4 is TCP or + * if the L4 is UDP and the checksum field is not zero, then the L4 checksum is + * updated. + */ + +#define MLXSW_AFA_IP_CODE 0x11 +#define MLXSW_AFA_IP_SIZE 2 + +enum mlxsw_afa_ip_s_d { + /* ip refers to dip */ + MLXSW_AFA_IP_S_D_DIP, + /* ip refers to sip */ + MLXSW_AFA_IP_S_D_SIP, +}; + +/* afa_ip_s_d + * Source or destination. + */ +MLXSW_ITEM32(afa, ip, s_d, 0x00, 31, 1); + +enum mlxsw_afa_ip_m_l { + /* LSB: ip[63:0] refers to ip[63:0] */ + MLXSW_AFA_IP_M_L_LSB, + /* MSB: ip[63:0] refers to ip[127:64] */ + MLXSW_AFA_IP_M_L_MSB, +}; + +/* afa_ip_m_l + * MSB or LSB. + */ +MLXSW_ITEM32(afa, ip, m_l, 0x00, 30, 1); + +/* afa_ip_ip_63_32 + * Bits [63:32] in the IP address to change to. + */ +MLXSW_ITEM32(afa, ip, ip_63_32, 0x08, 0, 32); + +/* afa_ip_ip_31_0 + * Bits [31:0] in the IP address to change to. + */ +MLXSW_ITEM32(afa, ip, ip_31_0, 0x0C, 0, 32); + +static void mlxsw_afa_ip_pack(char *payload, enum mlxsw_afa_ip_s_d s_d, + enum mlxsw_afa_ip_m_l m_l, u32 ip_31_0, + u32 ip_63_32) +{ + mlxsw_afa_ip_s_d_set(payload, s_d); + mlxsw_afa_ip_m_l_set(payload, m_l); + mlxsw_afa_ip_ip_31_0_set(payload, ip_31_0); + mlxsw_afa_ip_ip_63_32_set(payload, ip_63_32); +} + +int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip, + bool is_lsb, u32 val_31_0, u32 val_63_32, + struct netlink_ext_ack *extack) +{ + enum mlxsw_afa_ip_s_d s_d = is_dip ? MLXSW_AFA_IP_S_D_DIP : + MLXSW_AFA_IP_S_D_SIP; + enum mlxsw_afa_ip_m_l m_l = is_lsb ? MLXSW_AFA_IP_M_L_LSB : + MLXSW_AFA_IP_M_L_MSB; + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_IP_CODE, + MLXSW_AFA_IP_SIZE); + + if (IS_ERR(act)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append IP action"); + return PTR_ERR(act); + } + + mlxsw_afa_ip_pack(act, s_d, m_l, val_31_0, val_63_32); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_ip); + /* L4 Port Action * -------------- * The L4_PORT_ACTION is used for modifying the sport and dport fields of the packet, e.g. for NAT. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 16cbd6acbb01..db58037be46e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -92,6 +92,9 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid, int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, u16 expected_irif, u16 min_mtu, bool rmid_valid, u32 kvdl_index); +int mlxsw_afa_block_append_ip(struct mlxsw_afa_block *block, bool is_dip, + bool is_lsb, u32 val_31_0, u32 val_63_32, + struct netlink_ext_ack *extack); int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport, u16 l4_port, struct netlink_ext_ack *extack); int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 6dd4ae2f45f4..29a74b8bd5b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -18,6 +18,7 @@ struct mlxsw_env_module_info { int num_ports_mapped; int num_ports_up; enum ethtool_module_power_mode_policy power_mode_policy; + enum mlxsw_reg_pmtm_module_type type; }; struct mlxsw_env { @@ -27,14 +28,47 @@ struct mlxsw_env { struct mlxsw_env_module_info module_info[]; }; -static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, - bool *qsfp, bool *cmis) +static int __mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(core); + int err; + + switch (mlxsw_env->module_info[module].type) { + case MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR: + err = -EINVAL; + break; + default: + err = 0; + } + + return err; +} + +static int mlxsw_env_validate_module_type(struct mlxsw_core *core, u8 module) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(core); + int err; + + mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(core, module); + mutex_unlock(&mlxsw_env->module_info_lock); + + return err; +} + +static int +mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp, + bool *cmis) { char mcia_pl[MLXSW_REG_MCIA_LEN]; char *eeprom_tmp; u8 ident; int err; + err = mlxsw_env_validate_module_type(core, id); + if (err) + return err; + mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, MLXSW_REG_MCIA_I2C_ADDR_LOW); err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); @@ -53,6 +87,7 @@ static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, *qsfp = true; break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP: *qsfp = true; *cmis = true; break; @@ -206,7 +241,8 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, return 0; } -int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, +int mlxsw_env_get_module_info(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, int module, struct ethtool_modinfo *modinfo) { u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; @@ -215,6 +251,13 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, unsigned int read_size; int err; + err = mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + netdev_err(netdev, + "EEPROM is not equipped on port module type"); + return err; + } + err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset, module_info, false, &read_size); if (err) @@ -261,6 +304,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP: /* Use SFF_8636 as base type. ethtool should recognize specific * type through the identifier value. */ @@ -356,6 +400,13 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module, { u32 bytes_read = 0; u16 device_addr; + int err; + + err = mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type"); + return err; + } /* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */ device_addr = page->offset; @@ -364,7 +415,6 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module, char mcia_pl[MLXSW_REG_MCIA_LEN]; char *eeprom_tmp; u8 size; - int err; size = min_t(u8, page->length - bytes_read, MLXSW_REG_MCIA_EEPROM_SIZE); @@ -414,11 +464,14 @@ int mlxsw_env_reset_module(struct net_device *netdev, !(req & (ETH_RESET_PHY << ETH_RESET_SHARED_SHIFT))) return 0; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + netdev_err(netdev, "Reset module is not supported on port module type\n"); + goto out; + } + if (mlxsw_env->module_info[module].num_ports_up) { netdev_err(netdev, "Cannot reset module when ports using it are administratively up\n"); err = -EINVAL; @@ -456,11 +509,14 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, u32 status_bits; int err; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Power mode is not supported on port module type"); + goto out; + } + params->policy = mlxsw_env->module_info[module].power_mode_policy; mlxsw_reg_mcion_pack(mcion_pl, module); @@ -560,9 +616,6 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, bool low_power; int err = 0; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - if (policy != ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH && policy != ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO) { NL_SET_ERR_MSG_MOD(extack, "Unsupported power mode policy"); @@ -571,6 +624,13 @@ mlxsw_env_set_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, mutex_lock(&mlxsw_env->module_info_lock); + err = __mlxsw_env_validate_module_type(mlxsw_core, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Power mode set is not supported on port module type"); + goto out; + } + if (mlxsw_env->module_info[module].power_mode_policy == policy) goto out; @@ -661,13 +721,12 @@ static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core, return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl); } -static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core, - u8 module_count) +static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core) { int i, err, sensor_index; bool has_temp_sensor; - for (i = 0; i < module_count; i++) { + for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) { err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i, &has_temp_sensor); if (err) @@ -759,15 +818,12 @@ mlxsw_env_mtwe_listener_func(const struct mlxsw_reg_info *reg, char *mtwe_pl, } static const struct mlxsw_listener mlxsw_env_temp_warn_listener = - MLXSW_EVENTL(mlxsw_env_mtwe_listener_func, MTWE, MTWE); + MLXSW_CORE_EVENTL(mlxsw_env_mtwe_listener_func, MTWE); static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (!mlxsw_core_temp_warn_enabled(mlxsw_core)) - return 0; - return mlxsw_core_trap_register(mlxsw_core, &mlxsw_env_temp_warn_listener, mlxsw_env); @@ -775,9 +831,6 @@ static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core) static void mlxsw_env_temp_warn_event_unregister(struct mlxsw_env *mlxsw_env) { - if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core)) - return; - mlxsw_core_trap_unregister(mlxsw_env->core, &mlxsw_env_temp_warn_listener, mlxsw_env); } @@ -849,16 +902,13 @@ mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl, } static const struct mlxsw_listener mlxsw_env_module_plug_listener = - MLXSW_EVENTL(mlxsw_env_pmpe_listener_func, PMPE, PMPE); + MLXSW_CORE_EVENTL(mlxsw_env_pmpe_listener_func, PMPE); static int mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (!mlxsw_core_temp_warn_enabled(mlxsw_core)) - return 0; - return mlxsw_core_trap_register(mlxsw_core, &mlxsw_env_module_plug_listener, mlxsw_env); @@ -867,21 +917,17 @@ mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core) static void mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env) { - if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core)) - return; - mlxsw_core_trap_unregister(mlxsw_env->core, &mlxsw_env_module_plug_listener, mlxsw_env); } static int -mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core, - u8 module_count) +mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core) { int i, err; - for (i = 0; i < module_count; i++) { + for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) { char pmaos_pl[MLXSW_REG_PMAOS_LEN]; mlxsw_reg_pmaos_pack(pmaos_pl, i); @@ -901,9 +947,6 @@ mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module, { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); *p_counter = mlxsw_env->module_info[module].module_overheat_counter; mutex_unlock(&mlxsw_env->module_info_lock); @@ -916,9 +959,6 @@ void mlxsw_env_module_port_map(struct mlxsw_core *mlxsw_core, u8 module) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return; - mutex_lock(&mlxsw_env->module_info_lock); mlxsw_env->module_info[module].num_ports_mapped++; mutex_unlock(&mlxsw_env->module_info_lock); @@ -929,9 +969,6 @@ void mlxsw_env_module_port_unmap(struct mlxsw_core *mlxsw_core, u8 module) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return; - mutex_lock(&mlxsw_env->module_info_lock); mlxsw_env->module_info[module].num_ports_mapped--; mutex_unlock(&mlxsw_env->module_info_lock); @@ -943,9 +980,6 @@ int mlxsw_env_module_port_up(struct mlxsw_core *mlxsw_core, u8 module) struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); int err = 0; - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return -EINVAL; - mutex_lock(&mlxsw_env->module_info_lock); if (mlxsw_env->module_info[module].power_mode_policy != @@ -975,9 +1009,6 @@ void mlxsw_env_module_port_down(struct mlxsw_core *mlxsw_core, u8 module) { struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); - if (WARN_ON_ONCE(module >= mlxsw_env->module_count)) - return; - mutex_lock(&mlxsw_env->module_info_lock); mlxsw_env->module_info[module].num_ports_up--; @@ -999,6 +1030,28 @@ out_unlock: } EXPORT_SYMBOL(mlxsw_env_module_port_down); +static int +mlxsw_env_module_type_set(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + int i; + + for (i = 0; i < mlxsw_env->module_count; i++) { + char pmtm_pl[MLXSW_REG_PMTM_LEN]; + int err; + + mlxsw_reg_pmtm_pack(pmtm_pl, 0, i); + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(pmtm), pmtm_pl); + if (err) + return err; + + mlxsw_env->module_info[i].type = + mlxsw_reg_pmtm_module_type_get(pmtm_pl); + } + + return 0; +} + int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env) { char mgpir_pl[MLXSW_REG_MGPIR_LEN]; @@ -1037,17 +1090,21 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env) if (err) goto err_module_plug_event_register; - err = mlxsw_env_module_oper_state_event_enable(mlxsw_core, - env->module_count); + err = mlxsw_env_module_oper_state_event_enable(mlxsw_core); if (err) goto err_oper_state_event_enable; - err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count); + err = mlxsw_env_module_temp_event_enable(mlxsw_core); if (err) goto err_temp_event_enable; + err = mlxsw_env_module_type_set(mlxsw_core); + if (err) + goto err_type_set; + return 0; +err_type_set: err_temp_event_enable: err_oper_state_event_enable: mlxsw_env_module_plug_event_unregister(env); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h index da121b1a84b4..ec6564e5d2ee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -12,7 +12,8 @@ struct ethtool_eeprom; int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, int off, int *temp); -int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, +int mlxsw_env_get_module_info(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, int module, struct ethtool_modinfo *modinfo); int mlxsw_env_get_module_eeprom(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index d41afdfbd085..8b170ad92302 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -57,14 +57,14 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; int temp, index; int err; - index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -80,14 +80,14 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; int temp_max, index; int err; - index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -103,9 +103,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0}; unsigned long val; int index; @@ -117,7 +117,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, if (val != 1) return -EINVAL; - index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, index); @@ -138,13 +138,13 @@ static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mfsm_pl[MLXSW_REG_MFSM_LEN]; int err; - mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index); + mlxsw_reg_mfsm_pack(mfsm_pl, mlxsw_hwmon_attr->type_index); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); @@ -157,9 +157,9 @@ static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char fore_pl[MLXSW_REG_FORE_LEN]; bool fault; int err; @@ -169,7 +169,7 @@ static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); return err; } - mlxsw_reg_fore_unpack(fore_pl, mlwsw_hwmon_attr->type_index, &fault); + mlxsw_reg_fore_unpack(fore_pl, mlxsw_hwmon_attr->type_index, &fault); return sprintf(buf, "%u\n", fault); } @@ -178,13 +178,13 @@ static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mfsc_pl[MLXSW_REG_MFSC_LEN]; int err; - mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0); + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_hwmon_attr->type_index, 0); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n"); @@ -198,9 +198,9 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mfsc_pl[MLXSW_REG_MFSC_LEN]; unsigned long val; int err; @@ -211,7 +211,7 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, if (val > 255) return -EINVAL; - mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val); + mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_hwmon_attr->type_index, val); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n"); @@ -224,14 +224,14 @@ static int mlxsw_hwmon_module_temp_get(struct device *dev, struct device_attribute *attr, int *p_temp) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; u8 module; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -261,15 +261,15 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; u8 module, fault; u16 temp; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); @@ -303,13 +303,13 @@ static int mlxsw_hwmon_module_temp_critical_get(struct device *dev, struct device_attribute *attr, int *p_temp) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; u8 module; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, SFP_TEMP_HIGH_WARN, p_temp); if (err) { @@ -337,13 +337,13 @@ static int mlxsw_hwmon_module_temp_emergency_get(struct device *dev, struct device_attribute *attr, int *p_temp) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; u8 module; int err; - module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, SFP_TEMP_HIGH_ALARM, p_temp); if (err) { @@ -373,11 +373,11 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); return sprintf(buf, "front panel %03u\n", - mlwsw_hwmon_attr->type_index); + mlxsw_hwmon_attr->type_index); } static ssize_t @@ -385,10 +385,10 @@ mlxsw_hwmon_gbox_temp_label_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); - struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; - int index = mlwsw_hwmon_attr->type_index - + struct mlxsw_hwmon *mlxsw_hwmon = mlxsw_hwmon_attr->hwmon; + int index = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->module_sensor_max + 1; return sprintf(buf, "gearbox %03u\n", index); @@ -655,9 +655,6 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) u8 module_sensor_max; int i, err; - if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core)) - return 0; - mlxsw_reg_mgpir_pack(mgpir_pl); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index b29824448aa8..05f54bd982c0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -357,6 +357,10 @@ static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, return 0; } +static struct thermal_zone_params mlxsw_thermal_params = { + .no_hwmon = true, +}; + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -388,11 +392,11 @@ static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, trip->min_state, THERMAL_WEIGHT_DEFAULT); if (err < 0) - goto err_bind_cooling_device; + goto err_thermal_zone_bind_cooling_device; } return 0; -err_bind_cooling_device: +err_thermal_zone_bind_cooling_device: for (j = i - 1; j >= 0; j--) thermal_zone_unbind_cooling_device(tzdev, j, cdev); return err; @@ -678,7 +682,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) MLXSW_THERMAL_TRIP_MASK, module_tz, &mlxsw_thermal_module_ops, - NULL, 0, + &mlxsw_thermal_params, + 0, module_tz->parent->polling_delay); if (IS_ERR(module_tz->tzdev)) { err = PTR_ERR(module_tz->tzdev); @@ -741,9 +746,6 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, char mgpir_pl[MLXSW_REG_MGPIR_LEN]; int i, err; - if (!mlxsw_core_res_query_enabled(core)) - return 0; - mlxsw_reg_mgpir_pack(mgpir_pl); err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); if (err) @@ -761,7 +763,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, for (i = 0; i < thermal->tz_module_num; i++) { err = mlxsw_thermal_module_init(dev, core, thermal, i); if (err) - goto err_unreg_tz_module_arr; + goto err_thermal_module_init; } for (i = 0; i < thermal->tz_module_num; i++) { @@ -770,12 +772,13 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, continue; err = mlxsw_thermal_module_tz_init(module_tz); if (err) - goto err_unreg_tz_module_arr; + goto err_thermal_module_tz_init; } return 0; -err_unreg_tz_module_arr: +err_thermal_module_tz_init: +err_thermal_module_init: for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); @@ -787,9 +790,6 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) { int i; - if (!mlxsw_core_res_query_enabled(thermal->core)) - return; - for (i = thermal->tz_module_num - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); @@ -808,7 +808,7 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) MLXSW_THERMAL_TRIP_MASK, gearbox_tz, &mlxsw_thermal_gearbox_ops, - NULL, 0, + &mlxsw_thermal_params, 0, gearbox_tz->parent->polling_delay); if (IS_ERR(gearbox_tz->tzdev)) return PTR_ERR(gearbox_tz->tzdev); @@ -837,9 +837,6 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, int i; int err; - if (!mlxsw_core_res_query_enabled(core)) - return 0; - mlxsw_reg_mgpir_pack(mgpir_pl); err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); if (err) @@ -866,12 +863,12 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, gearbox_tz->parent = thermal; err = mlxsw_thermal_gearbox_tz_init(gearbox_tz); if (err) - goto err_unreg_tz_gearbox; + goto err_thermal_gearbox_tz_init; } return 0; -err_unreg_tz_gearbox: +err_thermal_gearbox_tz_init: for (i--; i >= 0; i--) mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); kfree(thermal->tz_gearbox_arr); @@ -883,9 +880,6 @@ mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal) { int i; - if (!mlxsw_core_res_query_enabled(thermal->core)) - return; - for (i = thermal->tz_gearbox_num - 1; i >= 0; i--) mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); kfree(thermal->tz_gearbox_arr); @@ -915,7 +909,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl); if (err) { dev_err(dev, "Failed to probe PWMs\n"); - goto err_free_thermal; + goto err_reg_query; } mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); @@ -929,14 +923,14 @@ int mlxsw_thermal_init(struct mlxsw_core *core, err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl), mfsl_pl); if (err) - goto err_free_thermal; + goto err_reg_query; /* set the minimal RPMs to 0 */ mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0); err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl), mfsl_pl); if (err) - goto err_free_thermal; + goto err_reg_write; } } for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) { @@ -949,7 +943,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (IS_ERR(cdev)) { err = PTR_ERR(cdev); dev_err(dev, "Failed to register cooling device\n"); - goto err_unreg_cdevs; + goto err_thermal_cooling_device_register; } thermal->cdevs[i] = cdev; } @@ -968,43 +962,45 @@ int mlxsw_thermal_init(struct mlxsw_core *core, MLXSW_THERMAL_TRIP_MASK, thermal, &mlxsw_thermal_ops, - NULL, 0, + &mlxsw_thermal_params, 0, thermal->polling_delay); if (IS_ERR(thermal->tzdev)) { err = PTR_ERR(thermal->tzdev); dev_err(dev, "Failed to register thermal zone\n"); - goto err_unreg_cdevs; + goto err_thermal_zone_device_register; } err = mlxsw_thermal_modules_init(dev, core, thermal); if (err) - goto err_unreg_tzdev; + goto err_thermal_modules_init; err = mlxsw_thermal_gearboxes_init(dev, core, thermal); if (err) - goto err_unreg_modules_tzdev; + goto err_thermal_gearboxes_init; err = thermal_zone_device_enable(thermal->tzdev); if (err) - goto err_unreg_gearboxes; + goto err_thermal_zone_device_enable; *p_thermal = thermal; return 0; -err_unreg_gearboxes: +err_thermal_zone_device_enable: mlxsw_thermal_gearboxes_fini(thermal); -err_unreg_modules_tzdev: +err_thermal_gearboxes_init: mlxsw_thermal_modules_fini(thermal); -err_unreg_tzdev: +err_thermal_modules_init: if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); thermal->tzdev = NULL; } -err_unreg_cdevs: +err_thermal_zone_device_register: +err_thermal_cooling_device_register: for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) if (thermal->cdevs[i]) thermal_cooling_device_unregister(thermal->cdevs[i]); -err_free_thermal: +err_reg_write: +err_reg_query: devm_kfree(dev, thermal); return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 10d13f5f9c7d..060209983438 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -110,7 +110,8 @@ static int mlxsw_m_get_module_info(struct net_device *netdev, struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; - return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo); + return mlxsw_env_get_module_info(netdev, core, mlxsw_m_port->module, + modinfo); } static int @@ -460,7 +461,6 @@ static struct mlxsw_driver mlxsw_m_driver = { .init = mlxsw_m_init, .fini = mlxsw_m_fini, .profile = &mlxsw_m_config_profile, - .res_query_enabled = true, }; static const struct i2c_device_id mlxsw_m_i2c_id[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 24cc65018b41..67b1a2f8397f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4482,6 +4482,8 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23) +#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(24) +#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T BIT(25) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29) @@ -6062,6 +6064,58 @@ static inline void mlxsw_reg_pllp_unpack(char *payload, u8 *label_port, *slot_index = mlxsw_reg_pllp_slot_index_get(payload); } +/* PMTM - Port Module Type Mapping Register + * ---------------------------------------- + * The PMTM register allows query or configuration of module types. + * The register can only be set when the module is disabled by PMAOS register + */ +#define MLXSW_REG_PMTM_ID 0x5067 +#define MLXSW_REG_PMTM_LEN 0x10 + +MLXSW_REG_DEFINE(pmtm, MLXSW_REG_PMTM_ID, MLXSW_REG_PMTM_LEN); + +/* reg_pmtm_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtm, slot_index, 0x00, 24, 4); + +/* reg_pmtm_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, pmtm, module, 0x00, 16, 8); + +enum mlxsw_reg_pmtm_module_type { + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_4_LANES = 0, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP = 1, + MLXSW_REG_PMTM_MODULE_TYPE_SFP = 2, + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_SINGLE_LANE = 4, + MLXSW_REG_PMTM_MODULE_TYPE_BACKPLANE_2_LANES = 8, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP4X = 10, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP2X = 11, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP1X = 12, + MLXSW_REG_PMTM_MODULE_TYPE_QSFP_DD = 14, + MLXSW_REG_PMTM_MODULE_TYPE_OSFP = 15, + MLXSW_REG_PMTM_MODULE_TYPE_SFP_DD = 16, + MLXSW_REG_PMTM_MODULE_TYPE_DSFP = 17, + MLXSW_REG_PMTM_MODULE_TYPE_CHIP2CHIP8X = 18, + MLXSW_REG_PMTM_MODULE_TYPE_TWISTED_PAIR = 19, +}; + +/* reg_pmtm_module_type + * Module type. + * Access: RW + */ +MLXSW_ITEM32(reg, pmtm, module_type, 0x04, 0, 5); + +static inline void mlxsw_reg_pmtm_pack(char *payload, u8 slot_index, u8 module) +{ + MLXSW_REG_ZERO(pmtm, payload); + mlxsw_reg_pmtm_slot_index_set(payload, slot_index); + mlxsw_reg_pmtm_module_set(payload, module); +} + /* HTGT - Host Trap Group Table * ---------------------------- * Configures the properties for forwarding to CPU. @@ -6087,9 +6141,7 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4); enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_EMAD, - MLXSW_REG_HTGT_TRAP_GROUP_MFDE, - MLXSW_REG_HTGT_TRAP_GROUP_MTWE, - MLXSW_REG_HTGT_TRAP_GROUP_PMPE, + MLXSW_REG_HTGT_TRAP_GROUP_CORE_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_STP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, @@ -6732,12 +6784,14 @@ static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index, set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC; else set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT; - mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type); - if (egress) + if (egress) { + mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type); mlxsw_reg_ritr_egress_counter_index_set(payload, index); - else + } else { + mlxsw_reg_ritr_ingress_counter_set_type_set(payload, set_type); mlxsw_reg_ritr_ingress_counter_index_set(payload, index); + } } static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) @@ -9985,6 +10039,7 @@ enum mlxsw_reg_mcia_eeprom_module_info_id { MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD = 0x18, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_OSFP = 0x19, }; enum mlxsw_reg_mcia_eeprom_module_info { @@ -11271,24 +11326,24 @@ enum mlxsw_reg_mgpir_device_type { MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE, }; -/* device_type +/* mgpir_device_type * Access: RO */ MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4); -/* devices_per_flash +/* mgpir_devices_per_flash * Number of devices of device_type per flash (can be shared by few devices). * Access: RO */ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); -/* num_of_devices +/* mgpir_num_of_devices * Number of devices of device_type. * Access: RO */ MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); -/* num_of_modules +/* mgpir_num_of_modules * Number of modules. * Access: RO */ @@ -12568,6 +12623,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(pddr), MLXSW_REG(pmmp), MLXSW_REG(pllp), + MLXSW_REG(pmtm), MLXSW_REG(htgt), MLXSW_REG(hpkt), MLXSW_REG(rgcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index c7fc650608eb..daacf6291253 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -33,6 +33,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_ACL_MAX_REGIONS, MLXSW_RES_ID_ACL_MAX_GROUPS, MLXSW_RES_ID_ACL_MAX_GROUP_SIZE, + MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS, MLXSW_RES_ID_ACL_FLEX_KEYS, MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE, MLXSW_RES_ID_ACL_ACTIONS_PER_SET, @@ -90,6 +91,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, [MLXSW_RES_ID_ACL_MAX_GROUPS] = 0x2904, [MLXSW_RES_ID_ACL_MAX_GROUP_SIZE] = 0x2905, + [MLXSW_RES_ID_ACL_MAX_DEFAULT_ACTIONS] = 0x2908, [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910, [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911, [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index aa411dec62f0..7b7b17183d10 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -45,52 +45,49 @@ #include "spectrum_ptp.h" #include "spectrum_trap.h" +#define MLXSW_SP_FWREV_MINOR 2010 +#define MLXSW_SP_FWREV_SUBMINOR 1006 + #define MLXSW_SP1_FWREV_MAJOR 13 -#define MLXSW_SP1_FWREV_MINOR 2010 -#define MLXSW_SP1_FWREV_SUBMINOR 1006 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { .major = MLXSW_SP1_FWREV_MAJOR, - .minor = MLXSW_SP1_FWREV_MINOR, - .subminor = MLXSW_SP1_FWREV_SUBMINOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, .can_reset_minor = MLXSW_SP1_FWREV_CAN_RESET_MINOR, }; #define MLXSW_SP1_FW_FILENAME \ "mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \ - "." __stringify(MLXSW_SP1_FWREV_MINOR) \ - "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2" + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" #define MLXSW_SP2_FWREV_MAJOR 29 -#define MLXSW_SP2_FWREV_MINOR 2010 -#define MLXSW_SP2_FWREV_SUBMINOR 1006 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { .major = MLXSW_SP2_FWREV_MAJOR, - .minor = MLXSW_SP2_FWREV_MINOR, - .subminor = MLXSW_SP2_FWREV_SUBMINOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, }; #define MLXSW_SP2_FW_FILENAME \ "mellanox/mlxsw_spectrum2-" __stringify(MLXSW_SP2_FWREV_MAJOR) \ - "." __stringify(MLXSW_SP2_FWREV_MINOR) \ - "." __stringify(MLXSW_SP2_FWREV_SUBMINOR) ".mfa2" + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" #define MLXSW_SP3_FWREV_MAJOR 30 -#define MLXSW_SP3_FWREV_MINOR 2010 -#define MLXSW_SP3_FWREV_SUBMINOR 1006 static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = { .major = MLXSW_SP3_FWREV_MAJOR, - .minor = MLXSW_SP3_FWREV_MINOR, - .subminor = MLXSW_SP3_FWREV_SUBMINOR, + .minor = MLXSW_SP_FWREV_MINOR, + .subminor = MLXSW_SP_FWREV_SUBMINOR, }; #define MLXSW_SP3_FW_FILENAME \ "mellanox/mlxsw_spectrum3-" __stringify(MLXSW_SP3_FWREV_MAJOR) \ - "." __stringify(MLXSW_SP3_FWREV_MINOR) \ - "." __stringify(MLXSW_SP3_FWREV_SUBMINOR) ".mfa2" + "." __stringify(MLXSW_SP_FWREV_MINOR) \ + "." __stringify(MLXSW_SP_FWREV_SUBMINOR) ".mfa2" static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2"; @@ -2148,13 +2145,11 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *mlxsw_sp_port; enum mlxsw_reg_pude_oper_status status; - unsigned int max_ports; u16 local_port; - max_ports = mlxsw_core_max_ports(mlxsw_sp->core); local_port = mlxsw_reg_pude_local_port_get(pude_pl); - if (WARN_ON_ONCE(!local_port || local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) @@ -2393,45 +2388,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) return 0; } -static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_listener listeners[], - size_t listeners_count) -{ - int i; - int err; - - for (i = 0; i < listeners_count; i++) { - err = mlxsw_core_trap_register(mlxsw_sp->core, - &listeners[i], - mlxsw_sp); - if (err) - goto err_listener_register; - - } - return 0; - -err_listener_register: - for (i--; i >= 0; i--) { - mlxsw_core_trap_unregister(mlxsw_sp->core, - &listeners[i], - mlxsw_sp); - } - return err; -} - -static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_listener listeners[], - size_t listeners_count) -{ - int i; - - for (i = 0; i < listeners_count; i++) { - mlxsw_core_trap_unregister(mlxsw_sp->core, - &listeners[i], - mlxsw_sp); - } -} - static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_trap *trap; @@ -2456,21 +2412,23 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_trap_groups_set; - err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener, - ARRAY_SIZE(mlxsw_sp_listener)); + err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), + mlxsw_sp); if (err) goto err_traps_register; - err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners, - mlxsw_sp->listeners_count); + err = mlxsw_core_traps_register(mlxsw_sp->core, mlxsw_sp->listeners, + mlxsw_sp->listeners_count, mlxsw_sp); if (err) goto err_extra_traps_init; return 0; err_extra_traps_init: - mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, - ARRAY_SIZE(mlxsw_sp_listener)); + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), + mlxsw_sp); err_traps_register: err_trap_groups_set: err_cpu_policers_set: @@ -2480,10 +2438,11 @@ err_cpu_policers_set: static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) { - mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners, - mlxsw_sp->listeners_count); - mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, - ARRAY_SIZE(mlxsw_sp_listener)); + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp->listeners, + mlxsw_sp->listeners_count, + mlxsw_sp); + mlxsw_core_traps_unregister(mlxsw_sp->core, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener), mlxsw_sp); kfree(mlxsw_sp->trap); } @@ -2528,42 +2487,6 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->lags); } -static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) -{ - char htgt_pl[MLXSW_REG_HTGT_LEN]; - int err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); - if (err) - return err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); - if (err) - return err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MTWE, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); - if (err) - return err; - - mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_PMPE, - MLXSW_REG_HTGT_INVALID_POLICER, - MLXSW_REG_HTGT_DEFAULT_PRIORITY, - MLXSW_REG_HTGT_DEFAULT_TC); - return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); -} - static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .clock_init = mlxsw_sp1_ptp_clock_init, .clock_fini = mlxsw_sp1_ptp_clock_fini, @@ -3677,7 +3600,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .fw_filename = MLXSW_SP1_FW_FILENAME, .init = mlxsw_sp1_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, @@ -3705,9 +3627,6 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp2_driver = { @@ -3717,7 +3636,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .fw_filename = MLXSW_SP2_FW_FILENAME, .init = mlxsw_sp2_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, @@ -3746,9 +3664,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp3_driver = { @@ -3758,7 +3673,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .fw_filename = MLXSW_SP3_FW_FILENAME, .init = mlxsw_sp3_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, @@ -3787,9 +3701,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; static struct mlxsw_driver mlxsw_sp4_driver = { @@ -3797,7 +3708,6 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .priv_size = sizeof(struct mlxsw_sp), .init = mlxsw_sp4_init, .fini = mlxsw_sp_fini, - .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, .port_unsplit = mlxsw_sp_port_unsplit, .sb_pool_get = mlxsw_sp_sb_pool_get, @@ -3826,9 +3736,6 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, - .res_query_enabled = true, - .fw_fatal_enabled = true, - .temp_warn_enabled = true, }; bool mlxsw_sp_port_dev_check(const struct net_device *dev) @@ -4916,6 +4823,22 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; } +static bool mlxsw_sp_netdevice_event_is_router(unsigned long event) +{ + switch (event) { + case NETDEV_PRE_CHANGEADDR: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGEMTU: + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return true; + default: + return false; + } +} + static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -4940,9 +4863,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, event, ptr); - else if (event == NETDEV_PRE_CHANGEADDR || - event == NETDEV_CHANGEADDR || - event == NETDEV_CHANGEMTU) + else if (mlxsw_sp_netdevice_event_is_router(event)) err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bb2442e1f705..20588e699588 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -481,6 +481,13 @@ int mlxsw_sp_port_vlan_classification_set(struct mlxsw_sp_port *mlxsw_sp_port, bool is_8021ad_tagged, bool is_8021q_tagged); +static inline bool +mlxsw_sp_local_port_is_valid(struct mlxsw_sp *mlxsw_sp, u16 local_port) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + + return local_port < max_ports && local_port; +} /* spectrum_buffers.c */ struct mlxsw_sp_hdroom_prio { @@ -813,6 +820,24 @@ int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core); /* spectrum2_kvdl.c */ extern const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops; +enum mlxsw_sp_acl_mangle_field { + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP, + MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN, + MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT, + MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT, + MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP, + MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3, + MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4, +}; + struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; @@ -821,9 +846,14 @@ struct mlxsw_sp_acl_rule_info { ingress_bind_blocker:1, egress_bind_blocker:1, counter_valid:1, - policer_index_valid:1; + policer_index_valid:1, + ipv6_valid:1; unsigned int counter_index; u16 policer_index; + struct { + u32 prev_val; + enum mlxsw_sp_acl_mangle_field prev_field; + } ipv6; }; /* spectrum_flow.c */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c index a9fff8adc75e..d20e794e01ca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c @@ -213,7 +213,6 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_kvdl_part *part; bool need_update = true; unsigned int nr_entries; - size_t usage_size; u64 resource_size; int err; @@ -225,8 +224,8 @@ mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, } nr_entries = div_u64(resource_size, info->alloc_size); - usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); - part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); + part = kzalloc(struct_size(part, usage, BITS_TO_LONGS(nr_entries)), + GFP_KERNEL); if (!part) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index ad69913f19c1..5b0210862655 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -77,7 +77,14 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, int i; int err; + /* Some TCAM regions are not exposed to the host and used internally + * by the device. Allocate KVDL entries for the default actions of + * these regions to avoid the host from overwriting them. + */ tcam->kvdl_count = _tcam->max_regions; + if (MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_DEFAULT_ACTIONS)) + tcam->kvdl_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_DEFAULT_ACTIONS); err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, tcam->kvdl_count, &tcam->kvdl_index); if (err) @@ -97,7 +104,10 @@ static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, goto err_afa_block_continue; enc_actions = mlxsw_afa_block_cur_set(afa_block); - for (i = 0; i < tcam->kvdl_count; i++) { + /* Only write to KVDL entries used by TCAM regions exposed to the + * host. + */ + for (i = 0; i < _tcam->max_regions; i++) { mlxsw_reg_pefa_pack(pefa_pl, tcam->kvdl_index + i, true, enc_actions); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 70c11bfac08f..6c5af018546f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -505,14 +505,6 @@ int mlxsw_sp_acl_rulei_act_priority(struct mlxsw_sp *mlxsw_sp, extack); } -enum mlxsw_sp_acl_mangle_field { - MLXSW_SP_ACL_MANGLE_FIELD_IP_DSFIELD, - MLXSW_SP_ACL_MANGLE_FIELD_IP_DSCP, - MLXSW_SP_ACL_MANGLE_FIELD_IP_ECN, - MLXSW_SP_ACL_MANGLE_FIELD_IP_SPORT, - MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT, -}; - struct mlxsw_sp_acl_mangle_action { enum flow_action_mangle_base htype; /* Offset is u32-aligned. */ @@ -561,6 +553,18 @@ static struct mlxsw_sp_acl_mangle_action mlxsw_sp_acl_mangle_actions[] = { MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0x0000ffff, 16, IP_SPORT), MLXSW_SP_ACL_MANGLE_ACTION_UDP(0, 0xffff0000, 0, IP_DPORT), + + MLXSW_SP_ACL_MANGLE_ACTION_IP4(12, 0x00000000, 0, IP4_SIP), + MLXSW_SP_ACL_MANGLE_ACTION_IP4(16, 0x00000000, 0, IP4_DIP), + + MLXSW_SP_ACL_MANGLE_ACTION_IP6(8, 0x00000000, 0, IP6_SIP_1), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(12, 0x00000000, 0, IP6_SIP_2), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(16, 0x00000000, 0, IP6_SIP_3), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(20, 0x00000000, 0, IP6_SIP_4), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(24, 0x00000000, 0, IP6_DIP_1), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(28, 0x00000000, 0, IP6_DIP_2), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(32, 0x00000000, 0, IP6_DIP_3), + MLXSW_SP_ACL_MANGLE_ACTION_IP6(36, 0x00000000, 0, IP6_DIP_4), }; static int @@ -599,6 +603,22 @@ static int mlxsw_sp1_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, return err; } +static int +mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_sp_acl_mangle_field field, + u32 val, struct netlink_ext_ack *extack) +{ + if (!rulei->ipv6_valid) { + rulei->ipv6.prev_val = val; + rulei->ipv6_valid = true; + rulei->ipv6.prev_field = field; + return 0; + } + + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field order"); + return -EOPNOTSUPP; +} + static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct mlxsw_sp_acl_mangle_action *mact, @@ -615,6 +635,61 @@ static int mlxsw_sp2_acl_rulei_act_mangle_field(struct mlxsw_sp *mlxsw_sp, return mlxsw_afa_block_append_l4port(rulei->act_block, false, val, extack); case MLXSW_SP_ACL_MANGLE_FIELD_IP_DPORT: return mlxsw_afa_block_append_l4port(rulei->act_block, true, val, extack); + /* IPv4 fields */ + case MLXSW_SP_ACL_MANGLE_FIELD_IP4_SIP: + return mlxsw_afa_block_append_ip(rulei->act_block, false, + true, val, 0, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP4_DIP: + return mlxsw_afa_block_append_ip(rulei->act_block, true, + true, val, 0, extack); + /* IPv6 fields */ + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1: + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3: + return mlxsw_sp2_acl_rulei_act_mangle_field_ip_odd(rulei, + mact->field, + val, extack); + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_2: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_1) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + false, false, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_4: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_SIP_3) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + false, true, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_2: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_1) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + true, false, val, + rulei->ipv6.prev_val, + extack); + } + break; + case MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_4: + if (rulei->ipv6_valid && + rulei->ipv6.prev_field == MLXSW_SP_ACL_MANGLE_FIELD_IP6_DIP_3) { + rulei->ipv6_valid = false; + return mlxsw_afa_block_append_ip(rulei->act_block, + true, true, val, + rulei->ipv6.prev_val, + extack); + } + break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 1a2fef2a5379..5d494fabf93d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -266,10 +266,10 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) if (!rif) continue; if (enable) - mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, + mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); else - mlxsw_sp_rif_counter_free(mlxsw_sp, rif, + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } mutex_unlock(&mlxsw_sp->router->lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 20530712eadb..8b5d7f83b9b0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1034,13 +1034,10 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - int err; - - err = mlxsw_env_get_module_info(mlxsw_sp->core, - mlxsw_sp_port->mapping.module, - modinfo); - return err; + return mlxsw_env_get_module_info(netdev, mlxsw_sp->core, + mlxsw_sp_port->mapping.module, + modinfo); } static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, @@ -1048,13 +1045,10 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - int err; - - err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core, - mlxsw_sp_port->mapping.module, ee, - data); - return err; + return mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core, + mlxsw_sp_port->mapping.module, ee, + data); } static int @@ -1273,12 +1267,22 @@ struct mlxsw_sp1_port_link_mode { static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, + .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, + .speed = SPEED_100, + }, + { .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, .speed = SPEED_1000, }, { + .mask = MLXSW_REG_PTYS_ETH_SPEED_1000BASE_T, + .mask_ethtool = ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + .speed = SPEED_1000, + }, + { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index bb417db773b9..e91fb205e0b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -15,6 +15,46 @@ #include "spectrum.h" #include "core_acl_flex_keys.h" +static int mlxsw_sp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} + static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_flow_block *block, struct mlxsw_sp_acl_rule_info *rulei, @@ -191,10 +231,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + err = mlxsw_sp_policer_validate(flow_action, act, extack); + if (err) + return err; /* The kernel might adjust the requested burst size so * that it is not exactly a power of two. Re-adjust it @@ -233,6 +272,12 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } } + + if (rulei->ipv6_valid) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported mangle field"); + return -EOPNOTSUPP; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 0ff163fbc775..35422e64d89f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -568,12 +568,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, u8 domain_number, u16 sequence_id, u64 timestamp) { - unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp1_ptp_key key; u8 types; - if (WARN_ON_ONCE(local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d40762cfc453..79deb19e3a19 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -225,6 +225,64 @@ int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, return 0; } +struct mlxsw_sp_rif_counter_set_basic { + u64 good_unicast_packets; + u64 good_multicast_packets; + u64 good_broadcast_packets; + u64 good_unicast_bytes; + u64 good_multicast_bytes; + u64 good_broadcast_bytes; + u64 error_packets; + u64 discard_packets; + u64 error_bytes; + u64 discard_bytes; +}; + +static int +mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + struct mlxsw_sp_rif_counter_set_basic *set) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + unsigned int *p_counter_index; + int err; + + if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) + return -EINVAL; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + + mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index, + MLXSW_REG_RICNT_OPCODE_CLEAR); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); + if (err) + return err; + + if (!set) + return 0; + +#define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME) \ + (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl)) + + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes); + +#undef MLXSW_SP_RIF_COUNTER_EXTRACT + + return 0; +} + static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index) { @@ -235,16 +293,20 @@ static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); } -int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) { + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; unsigned int *p_counter_index; int err; + if (mlxsw_sp_rif_counter_valid_get(rif, dir)) + return 0; + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); if (!p_counter_index) return -EINVAL; + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, p_counter_index); if (err) @@ -268,10 +330,10 @@ err_counter_clear: return err; } -void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) { + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; unsigned int *p_counter_index; if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) @@ -296,14 +358,12 @@ static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif) if (!devlink_dpipe_table_counter_enabled(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) return; - mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - - mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) @@ -8148,6 +8208,166 @@ u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) return lb_rif->ul_rif_id; } +static bool +mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif) +{ + return mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_EGRESS) && + mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static int +mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif) +{ + int err; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + if (err) + return err; + + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + NULL); + if (err) + goto err_clear_ingress; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + if (err) + goto err_alloc_egress; + + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + NULL); + if (err) + goto err_clear_egress; + + return 0; + +err_clear_egress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); +err_alloc_egress: +err_clear_ingress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + return err; +} + +static void +mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static void +mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) + return; + netdev_offload_xstats_report_used(info->report_used); +} + +static int +mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif, + struct rtnl_hw_stats64 *p_stats) +{ + struct mlxsw_sp_rif_counter_set_basic ingress; + struct mlxsw_sp_rif_counter_set_basic egress; + int err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + &ingress); + if (err) + return err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + &egress); + if (err) + return err; + +#define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX) \ + ((SET.good_unicast_ ## SFX) + \ + (SET.good_multicast_ ## SFX) + \ + (SET.good_broadcast_ ## SFX)) + + p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets); + p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets); + p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes); + p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes); + p_stats->rx_errors = ingress.error_packets; + p_stats->tx_errors = egress.error_packets; + p_stats->rx_dropped = ingress.discard_packets; + p_stats->tx_dropped = egress.discard_packets; + p_stats->multicast = ingress.good_multicast_packets + + ingress.good_broadcast_packets; + +#undef MLXSW_SP_ROUTER_ALL_GOOD + + return 0; +} + +static int +mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + struct rtnl_hw_stats64 stats = {}; + int err; + + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) + return 0; + + err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats); + if (err) + return err; + + netdev_offload_xstats_report_delta(info->report_delta, &stats); + return 0; +} + +struct mlxsw_sp_router_hwstats_notify_work { + struct work_struct work; + struct net_device *dev; +}; + +static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work) +{ + struct mlxsw_sp_router_hwstats_notify_work *hws_work = + container_of(work, struct mlxsw_sp_router_hwstats_notify_work, + work); + + rtnl_lock(); + rtnl_offload_xstats_notify(hws_work->dev); + rtnl_unlock(); + dev_put(hws_work->dev); + kfree(hws_work); +} + +static void +mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev) +{ + struct mlxsw_sp_router_hwstats_notify_work *hws_work; + + /* To collect notification payload, the core ends up sending another + * notifier block message, which would deadlock on the attempt to + * acquire the router lock again. Just postpone the notification until + * later. + */ + + hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL); + if (!hws_work) + return; + + INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work); + dev_hold(dev); + hws_work->dev = dev; + mlxsw_core_schedule_work(&hws_work->work); +} + int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { return rif->dev->ifindex; @@ -8158,6 +8378,16 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) return rif->dev; } +static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) +{ + struct rtnl_hw_stats64 stats = {}; + + if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats)) + netdev_offload_xstats_push_delta(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3, + &stats); +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -8218,10 +8448,19 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_mr_rif_add; } - mlxsw_sp_rif_counters_alloc(rif); + if (netdev_offload_xstats_enabled(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + err = mlxsw_sp_router_port_l3_stats_enable(rif); + if (err) + goto err_stats_enable; + mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + } else { + mlxsw_sp_rif_counters_alloc(rif); + } return rif; +err_stats_enable: err_mr_rif_add: for (i--; i >= 0; i--) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); @@ -8251,7 +8490,15 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - mlxsw_sp_rif_counters_free(rif); + if (netdev_offload_xstats_enabled(rif->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + mlxsw_sp_rif_push_l3_stats(rif); + mlxsw_sp_router_port_l3_stats_disable(rif); + mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + } else { + mlxsw_sp_rif_counters_free(rif); + } + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); ops->deconfigure(rif); @@ -9128,6 +9375,35 @@ static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, return -ENOBUFS; } +static int +mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif, + unsigned long event, + struct netdev_notifier_offload_xstats_info *info) +{ + switch (info->type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + break; + default: + return 0; + } + + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + return mlxsw_sp_router_port_l3_stats_enable(rif); + case NETDEV_OFFLOAD_XSTATS_DISABLE: + mlxsw_sp_router_port_l3_stats_disable(rif); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + mlxsw_sp_router_port_l3_stats_report_used(rif, info); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return mlxsw_sp_router_port_l3_stats_report_delta(rif, info); + } + + WARN_ON_ONCE(1); + return 0; +} + int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, unsigned long event, void *ptr) { @@ -9153,6 +9429,15 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, case NETDEV_PRE_CHANGEADDR: err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); break; + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + err = mlxsw_sp_router_port_offload_xstats_cmd(rif, event, ptr); + break; + default: + WARN_ON_ONCE(1); + break; } out: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 99e8371a82a5..fa829658a11b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -159,11 +159,9 @@ int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, u64 *cnt); -void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir); -int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir); struct mlxsw_sp_neigh_entry * mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index f9671cc53002..b73466470f75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -269,8 +269,7 @@ mlxsw_sp_span_entry_bridge_8021q(const struct net_device *br_dev, if (!vid && WARN_ON(br_vlan_get_pvid(br_dev, &vid))) return NULL; - if (!vid || - br_vlan_get_info(br_dev, vid, &vinfo) || + if (!vid || br_vlan_get_info(br_dev, vid, &vinfo) || !(vinfo.flags & BRIDGE_VLAN_INFO_BRENTRY)) return NULL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 65c1724c63b0..3bf12092a8a2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1234,8 +1234,7 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, if (netif_is_bridge_master(orig_dev)) { int err = 0; - if ((vlan->flags & BRIDGE_VLAN_INFO_BRENTRY) && - br_vlan_enabled(orig_dev)) + if (br_vlan_enabled(orig_dev)) err = mlxsw_sp_br_ban_rif_pvid_change(mlxsw_sp, orig_dev, vlan); if (!err) @@ -2616,7 +2615,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) { - unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; @@ -2630,7 +2628,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port); - if (WARN_ON_ONCE(local_port >= max_ports)) + if (WARN_ON_ONCE(!mlxsw_sp_local_port_is_valid(mlxsw_sp, local_port))) return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 0303e727e99f..82d55fc27edc 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -293,7 +293,7 @@ static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp, */ static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) { - netif_rx_ni(skb); + netif_rx(skb); } /** @@ -452,11 +452,9 @@ static int ks8851_probe_spi(struct spi_device *spi) return ks8851_probe_common(netdev, dev, msg_enable); } -static int ks8851_remove_spi(struct spi_device *spi) +static void ks8851_remove_spi(struct spi_device *spi) { ks8851_remove_common(&spi->dev); - - return 0; } static const struct of_device_id ks8851_match_table[] = { diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 634ac7649c43..559ad94a44d0 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -975,7 +975,7 @@ static void enc28j60_hw_rx(struct net_device *ndev) /* update statistics */ ndev->stats.rx_packets++; ndev->stats.rx_bytes += len; - netif_rx_ni(skb); + netif_rx(skb); } } /* @@ -1612,15 +1612,13 @@ error_alloc: return ret; } -static int enc28j60_remove(struct spi_device *spi) +static void enc28j60_remove(struct spi_device *spi) { struct enc28j60_net *priv = spi_get_drvdata(spi); unregister_netdev(priv->netdev); free_irq(spi->irq, priv); free_netdev(priv->netdev); - - return 0; } static const struct of_device_id enc28j60_dt_ids[] = { diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index b90efc80fb59..dc1840cb5b10 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1093,7 +1093,7 @@ error_out: return ret; } -static int encx24j600_spi_remove(struct spi_device *spi) +static void encx24j600_spi_remove(struct spi_device *spi) { struct encx24j600_priv *priv = dev_get_drvdata(&spi->dev); @@ -1101,8 +1101,6 @@ static int encx24j600_spi_remove(struct spi_device *spi) kthread_stop(priv->kworker_task); free_netdev(priv->ndev); - - return 0; } static const struct spi_device_id encx24j600_spi_id_table[] = { diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 91a755efe2e6..5f1e7b8bad4f 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -750,7 +750,7 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev, } if (eee->eee_enabled) { - ret = phy_init_eee(phydev, 0); + ret = phy_init_eee(phydev, false); if (ret) { netif_err(adapter, drv, adapter->netdev, "EEE initialization failed\n"); diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 8c6390d95158..5282d25a6f9b 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -18,6 +18,51 @@ #include "lan743x_main.h" #include "lan743x_ethtool.h" +#define MMD_ACCESS_ADDRESS 0 +#define MMD_ACCESS_WRITE 1 +#define MMD_ACCESS_READ 2 +#define MMD_ACCESS_READ_INC 3 + +static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) +{ + u32 chip_rev; + u32 strap; + + strap = lan743x_csr_read(adapter, STRAP_READ); + if (strap & STRAP_READ_USE_SGMII_EN_) { + if (strap & STRAP_READ_SGMII_EN_) + adapter->is_sgmii_en = true; + else + adapter->is_sgmii_en = false; + netif_dbg(adapter, drv, adapter->netdev, + "STRAP_READ: 0x%08X\n", strap); + } else { + chip_rev = lan743x_csr_read(adapter, FPGA_REV); + if (chip_rev) { + if (chip_rev & FPGA_SGMII_OP) + adapter->is_sgmii_en = true; + else + adapter->is_sgmii_en = false; + netif_dbg(adapter, drv, adapter->netdev, + "FPGA_REV: 0x%08X\n", chip_rev); + } else { + adapter->is_sgmii_en = false; + } + } +} + +static bool is_pci11x1x_chip(struct lan743x_adapter *adapter) +{ + struct lan743x_csr *csr = &adapter->csr; + u32 id_rev = csr->id_rev; + + if (((id_rev & 0xFFFF0000) == ID_REV_ID_A011_) || + ((id_rev & 0xFFFF0000) == ID_REV_ID_A041_)) { + return true; + } + return false; +} + static void lan743x_pci_cleanup(struct lan743x_adapter *adapter) { pci_release_selected_regions(adapter->pdev, @@ -250,7 +295,7 @@ static void lan743x_intr_shared_isr(void *context, u32 int_sts, u32 flags) } } if (int_sts & INT_BIT_ALL_TX_) { - for (channel = 0; channel < LAN743X_USED_TX_CHANNELS; + for (channel = 0; channel < adapter->used_tx_channels; channel++) { u32 int_bit = INT_BIT_DMA_TX_(channel); @@ -410,7 +455,7 @@ static u32 lan743x_intr_get_vector_flags(struct lan743x_adapter *adapter, { int index; - for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) { + for (index = 0; index < adapter->max_vector_count; index++) { if (adapter->intr.vector_list[index].int_mask & int_mask) return adapter->intr.vector_list[index].flags; } @@ -423,9 +468,12 @@ static void lan743x_intr_close(struct lan743x_adapter *adapter) int index = 0; lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_); - lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0x000000FF); + if (adapter->is_pci11x1x) + lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0x0000FFFF); + else + lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0x000000FF); - for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) { + for (index = 0; index < intr->number_of_vectors; index++) { if (intr->flags & INTR_FLAG_IRQ_REQUESTED(index)) { lan743x_intr_unregister_isr(adapter, index); intr->flags &= ~INTR_FLAG_IRQ_REQUESTED(index); @@ -445,9 +493,11 @@ static void lan743x_intr_close(struct lan743x_adapter *adapter) static int lan743x_intr_open(struct lan743x_adapter *adapter) { - struct msix_entry msix_entries[LAN743X_MAX_VECTOR_COUNT]; + struct msix_entry msix_entries[PCI11X1X_MAX_VECTOR_COUNT]; struct lan743x_intr *intr = &adapter->intr; + unsigned int used_tx_channels; u32 int_vec_en_auto_clr = 0; + u8 max_vector_count; u32 int_vec_map0 = 0; u32 int_vec_map1 = 0; int ret = -ENODEV; @@ -457,13 +507,15 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter) intr->number_of_vectors = 0; /* Try to set up MSIX interrupts */ + max_vector_count = adapter->max_vector_count; memset(&msix_entries[0], 0, - sizeof(struct msix_entry) * LAN743X_MAX_VECTOR_COUNT); - for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) + sizeof(struct msix_entry) * max_vector_count); + for (index = 0; index < max_vector_count; index++) msix_entries[index].entry = index; + used_tx_channels = adapter->used_tx_channels; ret = pci_enable_msix_range(adapter->pdev, msix_entries, 1, - 1 + LAN743X_USED_TX_CHANNELS + + 1 + used_tx_channels + LAN743X_USED_RX_CHANNELS); if (ret > 0) { @@ -556,8 +608,15 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter) lan743x_csr_write(adapter, INT_MOD_CFG5, LAN743X_INT_MOD); lan743x_csr_write(adapter, INT_MOD_CFG6, LAN743X_INT_MOD); lan743x_csr_write(adapter, INT_MOD_CFG7, LAN743X_INT_MOD); - lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432); - lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001); + if (adapter->is_pci11x1x) { + lan743x_csr_write(adapter, INT_MOD_CFG8, LAN743X_INT_MOD); + lan743x_csr_write(adapter, INT_MOD_CFG9, LAN743X_INT_MOD); + lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00007654); + lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00003210); + } else { + lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432); + lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001); + } lan743x_csr_write(adapter, INT_MOD_MAP2, 0x00FFFFFF); } @@ -570,8 +629,8 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter) if (intr->number_of_vectors > 1) { int number_of_tx_vectors = intr->number_of_vectors - 1; - if (number_of_tx_vectors > LAN743X_USED_TX_CHANNELS) - number_of_tx_vectors = LAN743X_USED_TX_CHANNELS; + if (number_of_tx_vectors > used_tx_channels) + number_of_tx_vectors = used_tx_channels; flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ | LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C | LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK | @@ -609,9 +668,9 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter) INT_VEC_EN_(vector)); } } - if ((intr->number_of_vectors - LAN743X_USED_TX_CHANNELS) > 1) { + if ((intr->number_of_vectors - used_tx_channels) > 1) { int number_of_rx_vectors = intr->number_of_vectors - - LAN743X_USED_TX_CHANNELS - 1; + used_tx_channels - 1; if (number_of_rx_vectors > LAN743X_USED_RX_CHANNELS) number_of_rx_vectors = LAN743X_USED_RX_CHANNELS; @@ -632,7 +691,7 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter) LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR; } for (index = 0; index < number_of_rx_vectors; index++) { - int vector = index + 1 + LAN743X_USED_TX_CHANNELS; + int vector = index + 1 + used_tx_channels; u32 int_bit = INT_BIT_DMA_RX_(index); /* map RX interrupt to vector */ @@ -760,6 +819,96 @@ static int lan743x_mdiobus_write(struct mii_bus *bus, return ret; } +static u32 lan743x_mac_mmd_access(int id, int index, int op) +{ + u16 dev_addr; + u32 ret; + + dev_addr = (index >> 16) & 0x1f; + ret = (id << MAC_MII_ACC_PHY_ADDR_SHIFT_) & + MAC_MII_ACC_PHY_ADDR_MASK_; + ret |= (dev_addr << MAC_MII_ACC_MIIMMD_SHIFT_) & + MAC_MII_ACC_MIIMMD_MASK_; + if (op == MMD_ACCESS_WRITE) + ret |= MAC_MII_ACC_MIICMD_WRITE_; + else if (op == MMD_ACCESS_READ) + ret |= MAC_MII_ACC_MIICMD_READ_; + else if (op == MMD_ACCESS_READ_INC) + ret |= MAC_MII_ACC_MIICMD_READ_INC_; + else + ret |= MAC_MII_ACC_MIICMD_ADDR_; + ret |= (MAC_MII_ACC_MII_BUSY_ | MAC_MII_ACC_MIICL45_); + + return ret; +} + +static int lan743x_mdiobus_c45_read(struct mii_bus *bus, int phy_id, int index) +{ + struct lan743x_adapter *adapter = bus->priv; + u32 mmd_access; + int ret; + + /* comfirm MII not busy */ + ret = lan743x_mac_mii_wait_till_not_busy(adapter); + if (ret < 0) + return ret; + if (index & MII_ADDR_C45) { + /* Load Register Address */ + lan743x_csr_write(adapter, MAC_MII_DATA, (u32)(index & 0xffff)); + mmd_access = lan743x_mac_mmd_access(phy_id, index, + MMD_ACCESS_ADDRESS); + lan743x_csr_write(adapter, MAC_MII_ACC, mmd_access); + ret = lan743x_mac_mii_wait_till_not_busy(adapter); + if (ret < 0) + return ret; + /* Read Data */ + mmd_access = lan743x_mac_mmd_access(phy_id, index, + MMD_ACCESS_READ); + lan743x_csr_write(adapter, MAC_MII_ACC, mmd_access); + ret = lan743x_mac_mii_wait_till_not_busy(adapter); + if (ret < 0) + return ret; + ret = lan743x_csr_read(adapter, MAC_MII_DATA); + return (int)(ret & 0xFFFF); + } + + ret = lan743x_mdiobus_read(bus, phy_id, index); + return ret; +} + +static int lan743x_mdiobus_c45_write(struct mii_bus *bus, + int phy_id, int index, u16 regval) +{ + struct lan743x_adapter *adapter = bus->priv; + u32 mmd_access; + int ret; + + /* confirm MII not busy */ + ret = lan743x_mac_mii_wait_till_not_busy(adapter); + if (ret < 0) + return ret; + if (index & MII_ADDR_C45) { + /* Load Register Address */ + lan743x_csr_write(adapter, MAC_MII_DATA, (u32)(index & 0xffff)); + mmd_access = lan743x_mac_mmd_access(phy_id, index, + MMD_ACCESS_ADDRESS); + lan743x_csr_write(adapter, MAC_MII_ACC, mmd_access); + ret = lan743x_mac_mii_wait_till_not_busy(adapter); + if (ret < 0) + return ret; + /* Write Data */ + lan743x_csr_write(adapter, MAC_MII_DATA, (u32)regval); + mmd_access = lan743x_mac_mmd_access(phy_id, index, + MMD_ACCESS_WRITE); + lan743x_csr_write(adapter, MAC_MII_ACC, mmd_access); + ret = lan743x_mac_mii_wait_till_not_busy(adapter); + } else { + ret = lan743x_mdiobus_write(bus, phy_id, index, regval); + } + + return ret; +} + static void lan743x_mac_set_address(struct lan743x_adapter *adapter, u8 *addr) { @@ -2491,7 +2640,8 @@ static int lan743x_netdev_close(struct net_device *netdev) struct lan743x_adapter *adapter = netdev_priv(netdev); int index; - lan743x_tx_close(&adapter->tx[0]); + for (index = 0; index < adapter->used_tx_channels; index++) + lan743x_tx_close(&adapter->tx[index]); for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) lan743x_rx_close(&adapter->rx[index]); @@ -2537,12 +2687,19 @@ static int lan743x_netdev_open(struct net_device *netdev) goto close_rx; } - ret = lan743x_tx_open(&adapter->tx[0]); - if (ret) - goto close_rx; - + for (index = 0; index < adapter->used_tx_channels; index++) { + ret = lan743x_tx_open(&adapter->tx[index]); + if (ret) + goto close_tx; + } return 0; +close_tx: + for (index = 0; index < adapter->used_tx_channels; index++) { + if (adapter->tx[index].ring_cpu_ptr) + lan743x_tx_close(&adapter->tx[index]); + } + close_rx: for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) { if (adapter->rx[index].ring_cpu_ptr) @@ -2569,8 +2726,12 @@ static netdev_tx_t lan743x_netdev_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct lan743x_adapter *adapter = netdev_priv(netdev); + u8 ch = 0; - return lan743x_tx_xmit_frame(&adapter->tx[0], skb); + if (adapter->is_pci11x1x) + ch = skb->queue_mapping % PCI11X1X_USED_TX_CHANNELS; + + return lan743x_tx_xmit_frame(&adapter->tx[ch], skb); } static int lan743x_netdev_ioctl(struct net_device *netdev, @@ -2701,6 +2862,18 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, int index; int ret; + adapter->is_pci11x1x = is_pci11x1x_chip(adapter); + if (adapter->is_pci11x1x) { + adapter->max_tx_channels = PCI11X1X_MAX_TX_CHANNELS; + adapter->used_tx_channels = PCI11X1X_USED_TX_CHANNELS; + adapter->max_vector_count = PCI11X1X_MAX_VECTOR_COUNT; + pci11x1x_strap_get_status(adapter); + } else { + adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; + adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; + adapter->max_vector_count = LAN743X_MAX_VECTOR_COUNT; + } + adapter->intr.irq = adapter->pdev->irq; lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF); @@ -2731,15 +2904,19 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, adapter->rx[index].channel_number = index; } - tx = &adapter->tx[0]; - tx->adapter = adapter; - tx->channel_number = 0; - spin_lock_init(&tx->ring_lock); + for (index = 0; index < adapter->used_tx_channels; index++) { + tx = &adapter->tx[index]; + tx->adapter = adapter; + tx->channel_number = index; + spin_lock_init(&tx->ring_lock); + } + return 0; } static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) { + u32 sgmii_ctl; int ret; adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev); @@ -2749,9 +2926,35 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) } adapter->mdiobus->priv = (void *)adapter; - adapter->mdiobus->read = lan743x_mdiobus_read; - adapter->mdiobus->write = lan743x_mdiobus_write; - adapter->mdiobus->name = "lan743x-mdiobus"; + if (adapter->is_pci11x1x) { + if (adapter->is_sgmii_en) { + sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); + sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; + lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); + netif_dbg(adapter, drv, adapter->netdev, + "SGMII operation\n"); + } else { + sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); + sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; + lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); + netif_dbg(adapter, drv, adapter->netdev, + "(R)GMII operation\n"); + } + + adapter->mdiobus->probe_capabilities = MDIOBUS_C22_C45; + adapter->mdiobus->read = lan743x_mdiobus_c45_read; + adapter->mdiobus->write = lan743x_mdiobus_c45_write; + adapter->mdiobus->name = "lan743x-mdiobus-c45"; + netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus-c45\n"); + } else { + adapter->mdiobus->read = lan743x_mdiobus_read; + adapter->mdiobus->write = lan743x_mdiobus_write; + adapter->mdiobus->name = "lan743x-mdiobus"; + netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus\n"); + } + snprintf(adapter->mdiobus->id, MII_BUS_ID_SIZE, "pci-%s", pci_name(adapter->pdev)); @@ -2786,8 +2989,17 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev, struct net_device *netdev = NULL; int ret = -ENODEV; - netdev = devm_alloc_etherdev(&pdev->dev, - sizeof(struct lan743x_adapter)); + if (id->device == PCI_DEVICE_ID_SMSC_A011 || + id->device == PCI_DEVICE_ID_SMSC_A041) { + netdev = devm_alloc_etherdev_mqs(&pdev->dev, + sizeof(struct lan743x_adapter), + PCI11X1X_USED_TX_CHANNELS, + LAN743X_USED_RX_CHANNELS); + } else { + netdev = devm_alloc_etherdev(&pdev->dev, + sizeof(struct lan743x_adapter)); + } + if (!netdev) goto return_error; @@ -3056,6 +3268,8 @@ static const struct dev_pm_ops lan743x_pm_ops = { static const struct pci_device_id lan743x_pcidev_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) }, { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7431) }, + { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_A011) }, + { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_A041) }, { 0, } }; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index aaf7aaeaba0c..2c8e76b4e1f7 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -16,8 +16,13 @@ #define ID_REV_ID_MASK_ (0xFFFF0000) #define ID_REV_ID_LAN7430_ (0x74300000) #define ID_REV_ID_LAN7431_ (0x74310000) -#define ID_REV_IS_VALID_CHIP_ID_(id_rev) \ - (((id_rev) & 0xFFF00000) == 0x74300000) +#define ID_REV_ID_LAN743X_ (0x74300000) +#define ID_REV_ID_A011_ (0xA0110000) // PCI11010 +#define ID_REV_ID_A041_ (0xA0410000) // PCI11414 +#define ID_REV_ID_A0X1_ (0xA0010000) +#define ID_REV_IS_VALID_CHIP_ID_(id_rev) \ + ((((id_rev) & 0xFFF00000) == ID_REV_ID_LAN743X_) || \ + (((id_rev) & 0xFF0F0000) == ID_REV_ID_A0X1_)) #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_REV_A0_ (0x00000000) #define ID_REV_CHIP_REV_B0_ (0x00000010) @@ -25,6 +30,17 @@ #define FPGA_REV (0x04) #define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF) #define FPGA_REV_GET_MAJOR_(fpga_rev) ((fpga_rev) & 0x000000FF) +#define FPGA_SGMII_OP BIT(24) + +#define STRAP_READ (0x0C) +#define STRAP_READ_USE_SGMII_EN_ BIT(22) +#define STRAP_READ_SGMII_EN_ BIT(6) +#define STRAP_READ_SGMII_REFCLK_ BIT(5) +#define STRAP_READ_SGMII_2_5G_ BIT(4) +#define STRAP_READ_BASE_X_ BIT(3) +#define STRAP_READ_RGMII_TXC_DELAY_EN_ BIT(2) +#define STRAP_READ_RGMII_RXC_DELAY_EN_ BIT(1) +#define STRAP_READ_ADV_PM_DISABLE_ BIT(0) #define HW_CFG (0x010) #define HW_CFG_RELOAD_TYPE_ALL_ (0x00000FC0) @@ -135,6 +151,13 @@ #define MAC_RX_ADDRL (0x11C) #define MAC_MII_ACC (0x120) +#define MAC_MII_ACC_MDC_CYCLE_SHIFT_ (16) +#define MAC_MII_ACC_MDC_CYCLE_MASK_ (0x00070000) +#define MAC_MII_ACC_MDC_CYCLE_2_5MHZ_ (0) +#define MAC_MII_ACC_MDC_CYCLE_5MHZ_ (1) +#define MAC_MII_ACC_MDC_CYCLE_12_5MHZ_ (2) +#define MAC_MII_ACC_MDC_CYCLE_25MHZ_ (3) +#define MAC_MII_ACC_MDC_CYCLE_1_25MHZ_ (4) #define MAC_MII_ACC_PHY_ADDR_SHIFT_ (11) #define MAC_MII_ACC_PHY_ADDR_MASK_ (0x0000F800) #define MAC_MII_ACC_MIIRINDA_SHIFT_ (6) @@ -143,6 +166,15 @@ #define MAC_MII_ACC_MII_WRITE_ (0x00000002) #define MAC_MII_ACC_MII_BUSY_ BIT(0) +#define MAC_MII_ACC_MIIMMD_SHIFT_ (6) +#define MAC_MII_ACC_MIIMMD_MASK_ (0x000007C0) +#define MAC_MII_ACC_MIICL45_ BIT(3) +#define MAC_MII_ACC_MIICMD_MASK_ (0x00000006) +#define MAC_MII_ACC_MIICMD_ADDR_ (0x00000000) +#define MAC_MII_ACC_MIICMD_WRITE_ (0x00000002) +#define MAC_MII_ACC_MIICMD_READ_ (0x00000004) +#define MAC_MII_ACC_MIICMD_READ_INC_ (0x00000006) + #define MAC_MII_DATA (0x124) #define MAC_EEE_TX_LPI_REQ_DLY_CNT (0x130) @@ -214,6 +246,11 @@ #define MAC_WUCSR2 (0x600) +#define SGMII_CTL (0x728) +#define SGMII_CTL_SGMII_ENABLE_ BIT(31) +#define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8) +#define SGMII_CTL_SGMII_POWER_DN_ BIT(1) + #define INT_STS (0x780) #define INT_BIT_DMA_RX_(channel) BIT(24 + (channel)) #define INT_BIT_ALL_RX_ (0x0F000000) @@ -261,6 +298,8 @@ #define INT_MOD_CFG5 (0x7D4) #define INT_MOD_CFG6 (0x7D8) #define INT_MOD_CFG7 (0x7DC) +#define INT_MOD_CFG8 (0x7E0) +#define INT_MOD_CFG9 (0x7E4) #define PTP_CMD_CTL (0x0A00) #define PTP_CMD_CTL_PTP_CLK_STP_NSEC_ BIT(6) @@ -541,10 +580,12 @@ #define LAN743X_MAX_RX_CHANNELS (4) #define LAN743X_MAX_TX_CHANNELS (1) +#define PCI11X1X_MAX_TX_CHANNELS (4) struct lan743x_adapter; #define LAN743X_USED_RX_CHANNELS (4) #define LAN743X_USED_TX_CHANNELS (1) +#define PCI11X1X_USED_TX_CHANNELS (4) #define LAN743X_INT_MOD (400) #if (LAN743X_USED_RX_CHANNELS > LAN743X_MAX_RX_CHANNELS) @@ -553,12 +594,17 @@ struct lan743x_adapter; #if (LAN743X_USED_TX_CHANNELS > LAN743X_MAX_TX_CHANNELS) #error Invalid LAN743X_USED_TX_CHANNELS #endif +#if (PCI11X1X_USED_TX_CHANNELS > PCI11X1X_MAX_TX_CHANNELS) +#error Invalid PCI11X1X_USED_TX_CHANNELS +#endif /* PCI */ /* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */ #define PCI_VENDOR_ID_SMSC PCI_VENDOR_ID_EFAR #define PCI_DEVICE_ID_SMSC_LAN7430 (0x7430) #define PCI_DEVICE_ID_SMSC_LAN7431 (0x7431) +#define PCI_DEVICE_ID_SMSC_A011 (0xA011) +#define PCI_DEVICE_ID_SMSC_A041 (0xA041) #define PCI_CONFIG_LENGTH (0x1000) @@ -607,13 +653,14 @@ struct lan743x_vector { }; #define LAN743X_MAX_VECTOR_COUNT (8) +#define PCI11X1X_MAX_VECTOR_COUNT (16) struct lan743x_intr { int flags; unsigned int irq; - struct lan743x_vector vector_list[LAN743X_MAX_VECTOR_COUNT]; + struct lan743x_vector vector_list[PCI11X1X_MAX_VECTOR_COUNT]; int number_of_vectors; bool using_vectors; @@ -721,8 +768,13 @@ struct lan743x_adapter { u8 mac_address[ETH_ALEN]; struct lan743x_phy phy; - struct lan743x_tx tx[LAN743X_MAX_TX_CHANNELS]; - struct lan743x_rx rx[LAN743X_MAX_RX_CHANNELS]; + struct lan743x_tx tx[PCI11X1X_USED_TX_CHANNELS]; + struct lan743x_rx rx[LAN743X_USED_RX_CHANNELS]; + bool is_pci11x1x; + bool is_sgmii_en; + u8 max_tx_channels; + u8 used_tx_channels; + u8 max_vector_count; #define LAN743X_ADAPTER_FLAG_OTP BIT(0) u32 flags; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index 8b7a8d879083..ec082594bbbd 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -1307,21 +1307,21 @@ int lan743x_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) switch (config.tx_type) { case HWTSTAMP_TX_OFF: - for (index = 0; index < LAN743X_MAX_TX_CHANNELS; - index++) + for (index = 0; index < adapter->used_tx_channels; + index++) lan743x_tx_set_timestamping_mode(&adapter->tx[index], false, false); lan743x_ptp_set_sync_ts_insert(adapter, false); break; case HWTSTAMP_TX_ON: - for (index = 0; index < LAN743X_MAX_TX_CHANNELS; + for (index = 0; index < adapter->used_tx_channels; index++) lan743x_tx_set_timestamping_mode(&adapter->tx[index], true, false); lan743x_ptp_set_sync_ts_insert(adapter, false); break; case HWTSTAMP_TX_ONESTEP_SYNC: - for (index = 0; index < LAN743X_MAX_TX_CHANNELS; + for (index = 0; index < adapter->used_tx_channels; index++) lan743x_tx_set_timestamping_mode(&adapter->tx[index], true, true); diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig index ac273f84b69e..4241ff0e5098 100644 --- a/drivers/net/ethernet/microchip/lan966x/Kconfig +++ b/drivers/net/ethernet/microchip/lan966x/Kconfig @@ -1,5 +1,6 @@ config LAN966X_SWITCH tristate "Lan966x switch driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on HAS_IOMEM depends on OF depends on NET_SWITCHDEV diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 040cfff9f577..a9ffc719aa0e 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \ - lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o + lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \ + lan966x_ptp.o diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index 614f12c2fe6a..e58a27fd8b50 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -545,6 +545,39 @@ static int lan966x_set_pauseparam(struct net_device *dev, return phylink_ethtool_set_pauseparam(port->phylink, pause); } +static int lan966x_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + struct lan966x_phc *phc; + + if (!lan966x->ptp) + return ethtool_op_get_ts_info(dev, info); + + phc = &lan966x->phc[LAN966X_PHC_PORT]; + + info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1; + if (info->phc_index == -1) { + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + return 0; + } + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | + BIT(HWTSTAMP_TX_ONESTEP_SYNC); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + const struct ethtool_ops lan966x_ethtool_ops = { .get_link_ksettings = lan966x_get_link_ksettings, .set_link_ksettings = lan966x_set_link_ksettings, @@ -556,6 +589,7 @@ const struct ethtool_ops lan966x_ethtool_ops = { .get_eth_mac_stats = lan966x_get_eth_mac_stats, .get_rmon_stats = lan966x_get_eth_rmon_stats, .get_link = ethtool_op_get_link, + .get_ts_info = lan966x_get_ts_info, }; static void lan966x_check_stats_work(struct work_struct *work) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 1f60fd125a1d..81c01665d01e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -4,11 +4,13 @@ #include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/iopoll.h> +#include <linux/ip.h> #include <linux/of_platform.h> #include <linux/of_net.h> #include <linux/packing.h> #include <linux/phy/phy.h> #include <linux/reset.h> +#include <net/addrconf.h> #include "lan966x_main.h" @@ -44,6 +46,7 @@ static const struct lan966x_main_io_resource lan966x_main_iomap[] = { { TARGET_ORG, 0, 1 }, /* 0xe2000000 */ { TARGET_GCB, 0x4000, 1 }, /* 0xe2004000 */ { TARGET_QS, 0x8000, 1 }, /* 0xe2008000 */ + { TARGET_PTP, 0xc000, 1 }, /* 0xe200c000 */ { TARGET_CHIP_TOP, 0x10000, 1 }, /* 0xe2010000 */ { TARGET_REW, 0x14000, 1 }, /* 0xe2014000 */ { TARGET_SYS, 0x28000, 1 }, /* 0xe2028000 */ @@ -201,7 +204,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, val = lan_rd(lan966x, QS_INJ_STATUS); if (!(QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp)) || (QS_INJ_STATUS_WMARK_REACHED_GET(val) & BIT(grp))) - return NETDEV_TX_BUSY; + goto err; /* Write start of frame */ lan_wr(QS_INJ_CTRL_GAP_SIZE_SET(1) | @@ -213,7 +216,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, /* Wait until the fifo is ready */ err = lan966x_port_inj_ready(lan966x, grp); if (err) - return NETDEV_TX_BUSY; + goto err; lan_wr((__force u32)ifh[i], lan966x, QS_INJ_WR(grp)); } @@ -225,7 +228,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, /* Wait until the fifo is ready */ err = lan966x_port_inj_ready(lan966x, grp); if (err) - return NETDEV_TX_BUSY; + goto err; lan_wr(((u32 *)skb->data)[i], lan966x, QS_INJ_WR(grp)); } @@ -235,7 +238,7 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, /* Wait until the fifo is ready */ err = lan966x_port_inj_ready(lan966x, grp); if (err) - return NETDEV_TX_BUSY; + goto err; lan_wr(0, lan966x, QS_INJ_WR(grp)); ++i; @@ -255,8 +258,19 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb, dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return NETDEV_TX_OK; + dev_consume_skb_any(skb); return NETDEV_TX_OK; + +err: + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + lan966x_ptp_txtstamp_release(port, skb); + + return NETDEV_TX_BUSY; } static void lan966x_ifh_set_bypass(void *ifh, u64 bypass) @@ -289,10 +303,24 @@ static void lan966x_ifh_set_vid(void *ifh, u64 vid) IFH_POS_TCI, IFH_LEN * 4, PACK, 0); } +static void lan966x_ifh_set_rew_op(void *ifh, u64 rew_op) +{ + packing(ifh, &rew_op, IFH_POS_REW_CMD + IFH_WID_REW_CMD - 1, + IFH_POS_REW_CMD, IFH_LEN * 4, PACK, 0); +} + +static void lan966x_ifh_set_timestamp(void *ifh, u64 timestamp) +{ + packing(ifh, ×tamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1, + IFH_POS_TIMESTAMP, IFH_LEN * 4, PACK, 0); +} + static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev) { struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; __be32 ifh[IFH_LEN]; + int err; memset(ifh, 0x0, sizeof(__be32) * IFH_LEN); @@ -302,7 +330,20 @@ static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev) lan966x_ifh_set_ipv(ifh, skb->priority >= 7 ? 0x7 : skb->priority); lan966x_ifh_set_vid(ifh, skb_vlan_tag_get(skb)); - return lan966x_port_ifh_xmit(skb, ifh, dev); + if (port->lan966x->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { + err = lan966x_ptp_txtstamp_request(port, skb); + if (err) + return err; + + lan966x_ifh_set_rew_op(ifh, LAN966X_SKB_CB(skb)->rew_op); + lan966x_ifh_set_timestamp(ifh, LAN966X_SKB_CB(skb)->ts_id); + } + + spin_lock(&lan966x->tx_lock); + err = lan966x_port_ifh_xmit(skb, ifh, dev); + spin_unlock(&lan966x->tx_lock); + + return err; } static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) @@ -350,6 +391,23 @@ static int lan966x_port_get_parent_id(struct net_device *dev, return 0; } +static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct lan966x_port *port = netdev_priv(dev); + + if (!phy_has_hwtstamp(dev->phydev) && port->lan966x->ptp) { + switch (cmd) { + case SIOCSHWTSTAMP: + return lan966x_ptp_hwtstamp_set(port, ifr); + case SIOCGHWTSTAMP: + return lan966x_ptp_hwtstamp_get(port, ifr); + } + } + + return phy_mii_ioctl(dev->phydev, ifr, cmd); +} + static const struct net_device_ops lan966x_port_netdev_ops = { .ndo_open = lan966x_port_open, .ndo_stop = lan966x_port_stop, @@ -360,6 +418,7 @@ static const struct net_device_ops lan966x_port_netdev_ops = { .ndo_get_stats64 = lan966x_stats_get, .ndo_set_mac_address = lan966x_port_set_mac_address, .ndo_get_port_parent_id = lan966x_port_get_parent_id, + .ndo_eth_ioctl = lan966x_port_ioctl, }; bool lan966x_netdevice_check(const struct net_device *dev) @@ -367,6 +426,33 @@ bool lan966x_netdevice_check(const struct net_device *dev) return dev->netdev_ops == &lan966x_port_netdev_ops; } +static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, + struct sk_buff *skb) +{ + u32 val; + + /* The IGMP and MLD frames are not forward by the HW if + * multicast snooping is enabled, therefor don't mark as + * offload to allow the SW to forward the frames accordingly. + */ + val = lan_rd(lan966x, ANA_CPU_FWD_CFG(port)); + if (!(val & (ANA_CPU_FWD_CFG_IGMP_REDIR_ENA | + ANA_CPU_FWD_CFG_MLD_REDIR_ENA))) + return true; + + if (skb->protocol == htons(ETH_P_IP) && + ip_hdr(skb)->protocol == IPPROTO_IGMP) + return false; + + if (IS_ENABLED(CONFIG_IPV6) && + skb->protocol == htons(ETH_P_IPV6) && + ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && + !ipv6_mc_check_mld(skb)) + return false; + + return true; +} + static int lan966x_port_xtr_status(struct lan966x *lan966x, u8 grp) { return lan_rd(lan966x, QS_XTR_RD(grp)); @@ -434,6 +520,12 @@ static void lan966x_ifh_get_len(void *ifh, u64 *len) IFH_POS_LEN, IFH_LEN * 4, UNPACK, 0); } +static void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp) +{ + packing(ifh, timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1, + IFH_POS_TIMESTAMP, IFH_LEN * 4, UNPACK, 0); +} + static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) { struct lan966x *lan966x = args; @@ -443,10 +535,10 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) return IRQ_NONE; do { + u64 src_port, len, timestamp; struct net_device *dev; struct sk_buff *skb; int sz = 0, buf_len; - u64 src_port, len; u32 ifh[IFH_LEN]; u32 *buf; u32 val; @@ -461,6 +553,7 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) lan966x_ifh_get_src_port(ifh, &src_port); lan966x_ifh_get_len(ifh, &len); + lan966x_ifh_get_timestamp(ifh, ×tamp); WARN_ON(src_port >= lan966x->num_phys_ports); @@ -501,12 +594,20 @@ static irqreturn_t lan966x_xtr_irq_handler(int irq, void *args) *buf = val; } + lan966x_ptp_rxtstamp(lan966x, skb, timestamp); skb->protocol = eth_type_trans(skb, dev); - if (lan966x->bridge_mask & BIT(src_port)) + if (lan966x->bridge_mask & BIT(src_port)) { skb->offload_fwd_mark = 1; - netif_rx_ni(skb); + skb_reset_network_header(skb); + if (!lan966x_hw_offload(lan966x, src_port, skb)) + skb->offload_fwd_mark = 0; + } + + if (!skb_defer_rx_timestamp(skb)) + netif_rx(skb); + dev->stats.rx_bytes += len; dev->stats.rx_packets++; @@ -628,7 +729,6 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, } port->phylink = phylink; - phylink_set_pcs(phylink, &port->phylink_pcs); err = register_netdev(dev); if (err) { @@ -708,7 +808,7 @@ static void lan966x_init(struct lan966x *lan966x) /* Setup flooding PGIDs */ lan_wr(ANA_FLOODING_IPMC_FLD_MC4_DATA_SET(PGID_MCIPV4) | ANA_FLOODING_IPMC_FLD_MC4_CTRL_SET(PGID_MC) | - ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MC) | + ANA_FLOODING_IPMC_FLD_MC6_DATA_SET(PGID_MCIPV6) | ANA_FLOODING_IPMC_FLD_MC6_CTRL_SET(PGID_MC), lan966x, ANA_FLOODING_IPMC); @@ -770,6 +870,10 @@ static void lan966x_init(struct lan966x *lan966x) ANA_PGID_PGID, lan966x, ANA_PGID(PGID_MCIPV4)); + lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0), + ANA_PGID_PGID, + lan966x, ANA_PGID(PGID_MCIPV6)); + /* Unicast to all other ports */ lan_rmw(GENMASK(lan966x->num_phys_ports - 1, 0), ANA_PGID_PGID, @@ -786,6 +890,8 @@ static void lan966x_init(struct lan966x *lan966x) lan_rmw(ANA_ANAINTR_INTR_ENA_SET(1), ANA_ANAINTR_INTR_ENA, lan966x, ANA_ANAINTR); + + spin_lock_init(&lan966x->tx_lock); } static int lan966x_ram_init(struct lan966x *lan966x) @@ -897,6 +1003,17 @@ static int lan966x_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, err, "Unable to use ana irq"); } + lan966x->ptp_irq = platform_get_irq_byname(pdev, "ptp"); + if (lan966x->ptp_irq > 0) { + err = devm_request_threaded_irq(&pdev->dev, lan966x->ptp_irq, NULL, + lan966x_ptp_irq_handler, IRQF_ONESHOT, + "ptp irq", lan966x); + if (err) + return dev_err_probe(&pdev->dev, err, "Unable to use ptp irq"); + + lan966x->ptp = 1; + } + /* init switch */ lan966x_init(lan966x); lan966x_stats_init(lan966x); @@ -931,8 +1048,15 @@ static int lan966x_probe(struct platform_device *pdev) if (err) goto cleanup_ports; + err = lan966x_ptp_init(lan966x); + if (err) + goto cleanup_fdb; + return 0; +cleanup_fdb: + lan966x_fdb_deinit(lan966x); + cleanup_ports: fwnode_handle_put(portnp); @@ -958,6 +1082,7 @@ static int lan966x_remove(struct platform_device *pdev) lan966x_mac_purge_entries(lan966x); lan966x_mdb_deinit(lan966x); lan966x_fdb_deinit(lan966x); + lan966x_ptp_deinit(lan966x); return 0; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 99c6d0a9f946..ae282da1da74 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -8,6 +8,7 @@ #include <linux/jiffies.h> #include <linux/phy.h> #include <linux/phylink.h> +#include <linux/ptp_clock_kernel.h> #include <net/switchdev.h> #include "lan966x_regs.h" @@ -50,6 +51,13 @@ #define LAN966X_SPEED_100 2 #define LAN966X_SPEED_10 3 +#define LAN966X_PHC_COUNT 3 +#define LAN966X_PHC_PORT 0 + +#define IFH_REW_OP_NOOP 0x0 +#define IFH_REW_OP_ONE_STEP_PTP 0x3 +#define IFH_REW_OP_TWO_STEP_PTP 0x4 + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -70,6 +78,24 @@ struct lan966x_stat_layout { char name[ETH_GSTRING_LEN]; }; +struct lan966x_phc { + struct ptp_clock *clock; + struct ptp_clock_info info; + struct hwtstamp_config hwtstamp_config; + struct lan966x *lan966x; + u8 index; +}; + +struct lan966x_skb_cb { + u8 rew_op; + u16 ts_id; + unsigned long jiffies; +}; + +#define LAN966X_PTP_TIMEOUT msecs_to_jiffies(10) +#define LAN966X_SKB_CB(skb) \ + ((struct lan966x_skb_cb *)((skb)->cb)) + struct lan966x { struct device *dev; @@ -82,6 +108,8 @@ struct lan966x { u8 base_mac[ETH_ALEN]; + spinlock_t tx_lock; /* lock for frame transmition */ + struct net_device *bridge; u16 bridge_mask; u16 bridge_fwd_mask; @@ -105,6 +133,7 @@ struct lan966x { /* interrupts */ int xtr_irq; int ana_irq; + int ptp_irq; /* worqueue for fdb */ struct workqueue_struct *fdb_work; @@ -113,6 +142,14 @@ struct lan966x { /* mdb */ struct list_head mdb_entries; struct list_head pgid_entries; + + /* ptp */ + bool ptp; + struct lan966x_phc phc[LAN966X_PHC_COUNT]; + spinlock_t ptp_clock_lock; /* lock for phc */ + spinlock_t ptp_ts_id_lock; /* lock for ts_id */ + struct mutex ptp_lock; /* lock for ptp interface state */ + u16 ptp_skbs; }; struct lan966x_port_config { @@ -135,6 +172,7 @@ struct lan966x_port { bool vlan_aware; bool learn_ena; + bool mcast_ena; struct phylink_config phylink_config; struct phylink_pcs phylink_pcs; @@ -142,6 +180,10 @@ struct lan966x_port { struct phylink *phylink; struct phy *serdes; struct fwnode_handle *fwnode; + + u8 ptp_cmd; + u16 ts_id; + struct sk_buff_head tx_skbs; }; extern const struct phylink_mac_ops lan966x_phylink_mac_ops; @@ -227,6 +269,20 @@ int lan966x_handle_port_mdb_del(struct lan966x_port *port, const struct switchdev_obj *obj); void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid); void lan966x_mdb_write_entries(struct lan966x *lan966x, u16 vid); +void lan966x_mdb_clear_entries(struct lan966x *lan966x); +void lan966x_mdb_restore_entries(struct lan966x *lan966x); + +int lan966x_ptp_init(struct lan966x *lan966x); +void lan966x_ptp_deinit(struct lan966x *lan966x); +int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr); +int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr); +void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb, + u64 timestamp); +int lan966x_ptp_txtstamp_request(struct lan966x_port *port, + struct sk_buff *skb); +void lan966x_ptp_txtstamp_release(struct lan966x_port *port, + struct sk_buff *skb); +irqreturn_t lan966x_ptp_irq_handler(int irq, void *args); static inline void __iomem *lan_addr(void __iomem *base[], int id, int tinst, int tcnt, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c index c68d0a99d292..2af55268bf4d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mdb.c @@ -504,3 +504,48 @@ void lan966x_mdb_erase_entries(struct lan966x *lan966x, u16 vid) lan966x_mdb_l2_cpu_remove(lan966x, mdb_entry, type); } } + +void lan966x_mdb_clear_entries(struct lan966x *lan966x) +{ + struct lan966x_mdb_entry *mdb_entry; + enum macaccess_entry_type type; + unsigned char mac[ETH_ALEN]; + + list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) { + type = lan966x_mdb_classify(mdb_entry->mac); + + lan966x_mdb_encode_mac(mac, mdb_entry, type); + /* Remove just the MAC entry, still keep the PGID in case of L2 + * entries because this can be restored at later point + */ + lan966x_mac_forget(lan966x, mac, mdb_entry->vid, type); + } +} + +void lan966x_mdb_restore_entries(struct lan966x *lan966x) +{ + struct lan966x_mdb_entry *mdb_entry; + enum macaccess_entry_type type; + unsigned char mac[ETH_ALEN]; + bool cpu_copy = false; + + list_for_each_entry(mdb_entry, &lan966x->mdb_entries, list) { + type = lan966x_mdb_classify(mdb_entry->mac); + + lan966x_mdb_encode_mac(mac, mdb_entry, type); + if (type == ENTRYTYPE_MACV4 || type == ENTRYTYPE_MACV6) { + /* Copy the frame to CPU only if the CPU is in the VLAN */ + if (lan966x_vlan_cpu_member_cpu_vlan_mask(lan966x, + mdb_entry->vid) && + mdb_entry->cpu_copy) + cpu_copy = true; + + lan966x_mac_ip_learn(lan966x, cpu_copy, mac, + mdb_entry->vid, type); + } else { + lan966x_mac_learn(lan966x, mdb_entry->pgid->index, + mdb_entry->mac, + mdb_entry->vid, type); + } + } +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c index b66a9aa00ea4..38a7e95d69b4 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c @@ -9,6 +9,14 @@ #include "lan966x_main.h" +static struct phylink_pcs *lan966x_phylink_mac_select(struct phylink_config *config, + phy_interface_t interface) +{ + struct lan966x_port *port = netdev_priv(to_net_dev(config->dev)); + + return &port->phylink_pcs; +} + static void lan966x_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) @@ -114,6 +122,7 @@ static void lan966x_pcs_aneg_restart(struct phylink_pcs *pcs) const struct phylink_mac_ops lan966x_phylink_mac_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = lan966x_phylink_mac_select, .mac_config = lan966x_phylink_mac_config, .mac_prepare = lan966x_phylink_mac_prepare, .mac_link_down = lan966x_phylink_mac_link_down, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c new file mode 100644 index 000000000000..ae782778d6dd --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/ptp_classify.h> + +#include "lan966x_main.h" + +#define LAN966X_MAX_PTP_ID 512 + +/* Represents 1ppm adjustment in 2^59 format with 6.037735849ns as reference + * The value is calculated as following: (1/1000000)/((2^-59)/6.037735849) + */ +#define LAN966X_1PPM_FORMAT 3480517749723LL + +/* Represents 1ppb adjustment in 2^29 format with 6.037735849ns as reference + * The value is calculated as following: (1/1000000000)/((2^59)/6.037735849) + */ +#define LAN966X_1PPB_FORMAT 3480517749LL + +#define TOD_ACC_PIN 0x5 + +enum { + PTP_PIN_ACTION_IDLE = 0, + PTP_PIN_ACTION_LOAD, + PTP_PIN_ACTION_SAVE, + PTP_PIN_ACTION_CLOCK, + PTP_PIN_ACTION_DELTA, + PTP_PIN_ACTION_TOD +}; + +static u64 lan966x_ptp_get_nominal_value(void) +{ + u64 res = 0x304d2df1; + + res <<= 32; + return res; +} + +int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr) +{ + struct lan966x *lan966x = port->lan966x; + struct hwtstamp_config cfg; + struct lan966x_phc *phc; + + /* For now don't allow to run ptp on ports that are part of a bridge, + * because in case of transparent clock the HW will still forward the + * frames, so there would be duplicate frames + */ + if (lan966x->bridge_mask & BIT(port->chip_port)) + return -EINVAL; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + switch (cfg.tx_type) { + case HWTSTAMP_TX_ON: + port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + port->ptp_cmd = IFH_REW_OP_ONE_STEP_PTP; + break; + case HWTSTAMP_TX_OFF: + port->ptp_cmd = IFH_REW_OP_NOOP; + break; + default: + return -ERANGE; + } + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + cfg.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + /* Commit back the result & save it */ + mutex_lock(&lan966x->ptp_lock); + phc = &lan966x->phc[LAN966X_PHC_PORT]; + memcpy(&phc->hwtstamp_config, &cfg, sizeof(cfg)); + mutex_unlock(&lan966x->ptp_lock); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +int lan966x_ptp_hwtstamp_get(struct lan966x_port *port, struct ifreq *ifr) +{ + struct lan966x *lan966x = port->lan966x; + struct lan966x_phc *phc; + + phc = &lan966x->phc[LAN966X_PHC_PORT]; + return copy_to_user(ifr->ifr_data, &phc->hwtstamp_config, + sizeof(phc->hwtstamp_config)) ? -EFAULT : 0; +} + +static int lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb) +{ + struct ptp_header *header; + u8 msgtype; + int type; + + if (port->ptp_cmd == IFH_REW_OP_NOOP) + return IFH_REW_OP_NOOP; + + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) + return IFH_REW_OP_NOOP; + + header = ptp_parse_header(skb, type); + if (!header) + return IFH_REW_OP_NOOP; + + if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + return IFH_REW_OP_TWO_STEP_PTP; + + /* If it is sync and run 1 step then set the correct operation, + * otherwise run as 2 step + */ + msgtype = ptp_get_msgtype(header, type); + if ((msgtype & 0xf) == 0) + return IFH_REW_OP_ONE_STEP_PTP; + + return IFH_REW_OP_TWO_STEP_PTP; +} + +static void lan966x_ptp_txtstamp_old_release(struct lan966x_port *port) +{ + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if time_after(LAN966X_SKB_CB(skb)->jiffies + LAN966X_PTP_TIMEOUT, + jiffies) + break; + + __skb_unlink(skb, &port->tx_skbs); + dev_kfree_skb_any(skb); + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); +} + +int lan966x_ptp_txtstamp_request(struct lan966x_port *port, + struct sk_buff *skb) +{ + struct lan966x *lan966x = port->lan966x; + unsigned long flags; + u8 rew_op; + + rew_op = lan966x_ptp_classify(port, skb); + LAN966X_SKB_CB(skb)->rew_op = rew_op; + + if (rew_op != IFH_REW_OP_TWO_STEP_PTP) + return 0; + + lan966x_ptp_txtstamp_old_release(port); + + spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags); + if (lan966x->ptp_skbs == LAN966X_MAX_PTP_ID) { + spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags); + return -EBUSY; + } + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + skb_queue_tail(&port->tx_skbs, skb); + LAN966X_SKB_CB(skb)->ts_id = port->ts_id; + LAN966X_SKB_CB(skb)->jiffies = jiffies; + + lan966x->ptp_skbs++; + port->ts_id++; + if (port->ts_id == LAN966X_MAX_PTP_ID) + port->ts_id = 0; + + spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags); + + return 0; +} + +void lan966x_ptp_txtstamp_release(struct lan966x_port *port, + struct sk_buff *skb) +{ + struct lan966x *lan966x = port->lan966x; + unsigned long flags; + + spin_lock_irqsave(&lan966x->ptp_ts_id_lock, flags); + port->ts_id--; + lan966x->ptp_skbs--; + skb_unlink(skb, &port->tx_skbs); + spin_unlock_irqrestore(&lan966x->ptp_ts_id_lock, flags); +} + +static void lan966x_get_hwtimestamp(struct lan966x *lan966x, + struct timespec64 *ts, + u32 nsec) +{ + /* Read current PTP time to get seconds */ + unsigned long flags; + u32 curr_nsec; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PIN_CFG_PIN_DOM_SET(LAN966X_PHC_PORT) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + ts->tv_sec = lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + curr_nsec = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + + ts->tv_nsec = nsec; + + /* Sec has incremented since the ts was registered */ + if (curr_nsec < nsec) + ts->tv_sec--; + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); +} + +irqreturn_t lan966x_ptp_irq_handler(int irq, void *args) +{ + int budget = LAN966X_MAX_PTP_ID; + struct lan966x *lan966x = args; + + while (budget--) { + struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct skb_shared_hwtstamps shhwtstamps; + struct lan966x_port *port; + struct timespec64 ts; + unsigned long flags; + u32 val, id, txport; + u32 delay; + + val = lan_rd(lan966x, PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retrieved */ + if (!(val & PTP_TWOSTEP_CTRL_VLD)) + break; + + WARN_ON(val & PTP_TWOSTEP_CTRL_OVFL); + + if (!(val & PTP_TWOSTEP_CTRL_STAMP_TX)) + continue; + + /* Retrieve the ts Tx port */ + txport = PTP_TWOSTEP_CTRL_STAMP_PORT_GET(val); + + /* Retrieve its associated skb */ + port = lan966x->ports[txport]; + + /* Retrieve the delay */ + delay = lan_rd(lan966x, PTP_TWOSTEP_STAMP); + delay = PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(delay); + + /* Get next timestamp from fifo, which needs to be the + * rx timestamp which represents the id of the frame + */ + lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1), + PTP_TWOSTEP_CTRL_NXT, + lan966x, PTP_TWOSTEP_CTRL); + + val = lan_rd(lan966x, PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retried */ + if (!(val & PTP_TWOSTEP_CTRL_VLD)) + break; + + /* Read RX timestamping to get the ID */ + id = lan_rd(lan966x, PTP_TWOSTEP_STAMP); + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if (LAN966X_SKB_CB(skb)->ts_id != id) + continue; + + __skb_unlink(skb, &port->tx_skbs); + skb_match = skb; + break; + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + + /* Next ts */ + lan_rmw(PTP_TWOSTEP_CTRL_NXT_SET(1), + PTP_TWOSTEP_CTRL_NXT, + lan966x, PTP_TWOSTEP_CTRL); + + if (WARN_ON(!skb_match)) + continue; + + spin_lock(&lan966x->ptp_ts_id_lock); + lan966x->ptp_skbs--; + spin_unlock(&lan966x->ptp_ts_id_lock); + + /* Get the h/w timestamp */ + lan966x_get_hwtimestamp(lan966x, &ts, delay); + + /* Set the timestamp into the skb */ + shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + skb_tstamp_tx(skb_match, &shhwtstamps); + + dev_kfree_skb_any(skb_match); + } + + return IRQ_HANDLED; +} + +static int lan966x_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + unsigned long flags; + bool neg_adj = 0; + u64 tod_inc; + u64 ref; + + if (!scaled_ppm) + return 0; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + + tod_inc = lan966x_ptp_get_nominal_value(); + + /* The multiplication is split in 2 separate additions because of + * overflow issues. If scaled_ppm with 16bit fractional part was bigger + * than 20ppm then we got overflow. + */ + ref = LAN966X_1PPM_FORMAT * (scaled_ppm >> 16); + ref += (LAN966X_1PPM_FORMAT * (0xffff & scaled_ppm)) >> 16; + tod_inc = neg_adj ? tod_inc - ref : tod_inc + ref; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(1 << BIT(phc->index)), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + lan_wr((u32)tod_inc & 0xFFFFFFFF, lan966x, + PTP_CLK_PER_CFG(phc->index, 0)); + lan_wr((u32)(tod_inc >> 32), lan966x, + PTP_CLK_PER_CFG(phc->index, 1)); + + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + + return 0; +} + +static int lan966x_ptp_settime64(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + unsigned long flags; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + /* Set new value */ + lan_wr(PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(upper_32_bits(ts->tv_sec)), + lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + lan_wr(lower_32_bits(ts->tv_sec), + lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + lan_wr(ts->tv_nsec, lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Apply new values */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_LOAD) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + + return 0; +} + +static int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + unsigned long flags; + time64_t s; + s64 ns; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + s = lan_rd(lan966x, PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + s <<= 32; + s |= lan_rd(lan966x, PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + ns = lan_rd(lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + ns &= PTP_TOD_NSEC_TOD_NSEC; + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + + /* Deal with negative values */ + if ((ns & 0xFFFFFFF0) == 0x3FFFFFF0) { + s--; + ns &= 0xf; + ns += 999999984; + } + + set_normalized_timespec64(ts, s, ns); + return 0; +} + +static int lan966x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct lan966x_phc *phc = container_of(ptp, struct lan966x_phc, info); + struct lan966x *lan966x = phc->lan966x; + + if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) { + unsigned long flags; + + spin_lock_irqsave(&lan966x->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + lan_wr(PTP_TOD_NSEC_TOD_NSEC_SET(delta), + lan966x, PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Adjust time with the value of PTP_TOD_NSEC */ + lan_rmw(PTP_PIN_CFG_PIN_ACTION_SET(PTP_PIN_ACTION_DELTA) | + PTP_PIN_CFG_PIN_DOM_SET(phc->index) | + PTP_PIN_CFG_PIN_SYNC_SET(0), + PTP_PIN_CFG_PIN_ACTION | + PTP_PIN_CFG_PIN_DOM | + PTP_PIN_CFG_PIN_SYNC, + lan966x, PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&lan966x->ptp_clock_lock, flags); + } else { + /* Fall back using lan966x_ptp_settime64 which is not exact */ + struct timespec64 ts; + u64 now; + + lan966x_ptp_gettime64(ptp, &ts); + + now = ktime_to_ns(timespec64_to_ktime(ts)); + ts = ns_to_timespec64(now + delta); + + lan966x_ptp_settime64(ptp, &ts); + } + + return 0; +} + +static struct ptp_clock_info lan966x_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "lan966x ptp", + .max_adj = 200000, + .gettime64 = lan966x_ptp_gettime64, + .settime64 = lan966x_ptp_settime64, + .adjtime = lan966x_ptp_adjtime, + .adjfine = lan966x_ptp_adjfine, +}; + +static int lan966x_ptp_phc_init(struct lan966x *lan966x, + int index, + struct ptp_clock_info *clock_info) +{ + struct lan966x_phc *phc = &lan966x->phc[index]; + + phc->info = *clock_info; + phc->clock = ptp_clock_register(&phc->info, lan966x->dev); + if (IS_ERR(phc->clock)) + return PTR_ERR(phc->clock); + + phc->index = index; + phc->lan966x = lan966x; + + /* PTP Rx stamping is always enabled. */ + phc->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + + return 0; +} + +int lan966x_ptp_init(struct lan966x *lan966x) +{ + u64 tod_adj = lan966x_ptp_get_nominal_value(); + struct lan966x_port *port; + int err, i; + + if (!lan966x->ptp) + return 0; + + for (i = 0; i < LAN966X_PHC_COUNT; ++i) { + err = lan966x_ptp_phc_init(lan966x, i, &lan966x_ptp_clock_info); + if (err) + return err; + } + + spin_lock_init(&lan966x->ptp_clock_lock); + spin_lock_init(&lan966x->ptp_ts_id_lock); + mutex_init(&lan966x->ptp_lock); + + /* Disable master counters */ + lan_wr(PTP_DOM_CFG_ENA_SET(0), lan966x, PTP_DOM_CFG); + + /* Configure the nominal TOD increment per clock cycle */ + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0x7), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + for (i = 0; i < LAN966X_PHC_COUNT; ++i) { + lan_wr((u32)tod_adj & 0xFFFFFFFF, lan966x, + PTP_CLK_PER_CFG(i, 0)); + lan_wr((u32)(tod_adj >> 32), lan966x, + PTP_CLK_PER_CFG(i, 1)); + } + + lan_rmw(PTP_DOM_CFG_CLKCFG_DIS_SET(0), + PTP_DOM_CFG_CLKCFG_DIS, + lan966x, PTP_DOM_CFG); + + /* Enable master counters */ + lan_wr(PTP_DOM_CFG_ENA_SET(0x7), lan966x, PTP_DOM_CFG); + + for (i = 0; i < lan966x->num_phys_ports; i++) { + port = lan966x->ports[i]; + if (!port) + continue; + + skb_queue_head_init(&port->tx_skbs); + } + + return 0; +} + +void lan966x_ptp_deinit(struct lan966x *lan966x) +{ + struct lan966x_port *port; + int i; + + for (i = 0; i < lan966x->num_phys_ports; i++) { + port = lan966x->ports[i]; + if (!port) + continue; + + skb_queue_purge(&port->tx_skbs); + } + + for (i = 0; i < LAN966X_PHC_COUNT; ++i) + ptp_clock_unregister(lan966x->phc[i].clock); +} + +void lan966x_ptp_rxtstamp(struct lan966x *lan966x, struct sk_buff *skb, + u64 timestamp) +{ + struct skb_shared_hwtstamps *shhwtstamps; + struct lan966x_phc *phc; + struct timespec64 ts; + u64 full_ts_in_ns; + + if (!lan966x->ptp) + return; + + phc = &lan966x->phc[LAN966X_PHC_PORT]; + lan966x_ptp_gettime64(&phc->info, &ts); + + /* Drop the sub-ns precision */ + timestamp = timestamp >> 2; + if (ts.tv_nsec < timestamp) + ts.tv_sec--; + ts.tv_nsec = timestamp; + full_ts_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec); + + shhwtstamps = skb_hwtstamps(skb); + shhwtstamps->hwtstamp = full_ts_in_ns; +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 797560172aca..0c0b3e173d53 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -19,6 +19,7 @@ enum lan966x_target { TARGET_DEV = 13, TARGET_GCB = 27, TARGET_ORG = 36, + TARGET_PTP = 41, TARGET_QS = 42, TARGET_QSYS = 46, TARGET_REW = 47, @@ -298,6 +299,24 @@ enum lan966x_target { /* ANA:PORT:CPU_FWD_CFG */ #define ANA_CPU_FWD_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4) +#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA BIT(6) +#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(x)\ + FIELD_PREP(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x) +#define ANA_CPU_FWD_CFG_MLD_REDIR_ENA_GET(x)\ + FIELD_GET(ANA_CPU_FWD_CFG_MLD_REDIR_ENA, x) + +#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA BIT(5) +#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(x)\ + FIELD_PREP(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x) +#define ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_GET(x)\ + FIELD_GET(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA, x) + +#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA BIT(4) +#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(x)\ + FIELD_PREP(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x) +#define ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_GET(x)\ + FIELD_GET(ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, x) + #define ANA_CPU_FWD_CFG_SRC_COPY_ENA BIT(3) #define ANA_CPU_FWD_CFG_SRC_COPY_ENA_SET(x)\ FIELD_PREP(ANA_CPU_FWD_CFG_SRC_COPY_ENA, x) @@ -559,6 +578,108 @@ enum lan966x_target { #define DEV_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\ FIELD_GET(DEV_PCS1G_STICKY_LINK_DOWN_STICKY, x) +/* PTP:PTP_CFG:PTP_DOM_CFG */ +#define PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 512, 0, 1, 16, 12, 0, 1, 4) + +#define PTP_DOM_CFG_ENA GENMASK(11, 9) +#define PTP_DOM_CFG_ENA_SET(x)\ + FIELD_PREP(PTP_DOM_CFG_ENA, x) +#define PTP_DOM_CFG_ENA_GET(x)\ + FIELD_GET(PTP_DOM_CFG_ENA, x) + +#define PTP_DOM_CFG_CLKCFG_DIS GENMASK(2, 0) +#define PTP_DOM_CFG_CLKCFG_DIS_SET(x)\ + FIELD_PREP(PTP_DOM_CFG_CLKCFG_DIS, x) +#define PTP_DOM_CFG_CLKCFG_DIS_GET(x)\ + FIELD_GET(PTP_DOM_CFG_CLKCFG_DIS, x) + +/* PTP:PTP_TOD_DOMAINS:CLK_PER_CFG */ +#define PTP_CLK_PER_CFG(g, r) __REG(TARGET_PTP, 0, 1, 528, g, 3, 28, 0, r, 2, 4) + +/* PTP:PTP_PINS:PTP_PIN_CFG */ +#define PTP_PIN_CFG(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 0, 0, 1, 4) + +#define PTP_PIN_CFG_PIN_ACTION GENMASK(29, 27) +#define PTP_PIN_CFG_PIN_ACTION_SET(x)\ + FIELD_PREP(PTP_PIN_CFG_PIN_ACTION, x) +#define PTP_PIN_CFG_PIN_ACTION_GET(x)\ + FIELD_GET(PTP_PIN_CFG_PIN_ACTION, x) + +#define PTP_PIN_CFG_PIN_SYNC GENMASK(26, 25) +#define PTP_PIN_CFG_PIN_SYNC_SET(x)\ + FIELD_PREP(PTP_PIN_CFG_PIN_SYNC, x) +#define PTP_PIN_CFG_PIN_SYNC_GET(x)\ + FIELD_GET(PTP_PIN_CFG_PIN_SYNC, x) + +#define PTP_PIN_CFG_PIN_DOM GENMASK(17, 16) +#define PTP_PIN_CFG_PIN_DOM_SET(x)\ + FIELD_PREP(PTP_PIN_CFG_PIN_DOM, x) +#define PTP_PIN_CFG_PIN_DOM_GET(x)\ + FIELD_GET(PTP_PIN_CFG_PIN_DOM, x) + +/* PTP:PTP_PINS:PTP_TOD_SEC_MSB */ +#define PTP_TOD_SEC_MSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 4, 0, 1, 4) + +#define PTP_TOD_SEC_MSB_TOD_SEC_MSB GENMASK(15, 0) +#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_SET(x)\ + FIELD_PREP(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x) +#define PTP_TOD_SEC_MSB_TOD_SEC_MSB_GET(x)\ + FIELD_GET(PTP_TOD_SEC_MSB_TOD_SEC_MSB, x) + +/* PTP:PTP_PINS:PTP_TOD_SEC_LSB */ +#define PTP_TOD_SEC_LSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 8, 0, 1, 4) + +/* PTP:PTP_PINS:PTP_TOD_NSEC */ +#define PTP_TOD_NSEC(g) __REG(TARGET_PTP, 0, 1, 0, g, 8, 64, 12, 0, 1, 4) + +#define PTP_TOD_NSEC_TOD_NSEC GENMASK(29, 0) +#define PTP_TOD_NSEC_TOD_NSEC_SET(x)\ + FIELD_PREP(PTP_TOD_NSEC_TOD_NSEC, x) +#define PTP_TOD_NSEC_TOD_NSEC_GET(x)\ + FIELD_GET(PTP_TOD_NSEC_TOD_NSEC, x) + +/* PTP:PTP_TS_FIFO:PTP_TWOSTEP_CTRL */ +#define PTP_TWOSTEP_CTRL __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 0, 0, 1, 4) + +#define PTP_TWOSTEP_CTRL_NXT BIT(11) +#define PTP_TWOSTEP_CTRL_NXT_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_NXT, x) +#define PTP_TWOSTEP_CTRL_NXT_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_NXT, x) + +#define PTP_TWOSTEP_CTRL_VLD BIT(10) +#define PTP_TWOSTEP_CTRL_VLD_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_VLD, x) +#define PTP_TWOSTEP_CTRL_VLD_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_VLD, x) + +#define PTP_TWOSTEP_CTRL_STAMP_TX BIT(9) +#define PTP_TWOSTEP_CTRL_STAMP_TX_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_TX, x) +#define PTP_TWOSTEP_CTRL_STAMP_TX_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_TX, x) + +#define PTP_TWOSTEP_CTRL_STAMP_PORT GENMASK(8, 1) +#define PTP_TWOSTEP_CTRL_STAMP_PORT_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_STAMP_PORT, x) +#define PTP_TWOSTEP_CTRL_STAMP_PORT_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_STAMP_PORT, x) + +#define PTP_TWOSTEP_CTRL_OVFL BIT(0) +#define PTP_TWOSTEP_CTRL_OVFL_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_CTRL_OVFL, x) +#define PTP_TWOSTEP_CTRL_OVFL_GET(x)\ + FIELD_GET(PTP_TWOSTEP_CTRL_OVFL, x) + +/* PTP:PTP_TS_FIFO:PTP_TWOSTEP_STAMP */ +#define PTP_TWOSTEP_STAMP __REG(TARGET_PTP, 0, 1, 612, 0, 1, 12, 4, 0, 1, 4) + +#define PTP_TWOSTEP_STAMP_STAMP_NSEC GENMASK(31, 2) +#define PTP_TWOSTEP_STAMP_STAMP_NSEC_SET(x)\ + FIELD_PREP(PTP_TWOSTEP_STAMP_STAMP_NSEC, x) +#define PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(x)\ + FIELD_GET(PTP_TWOSTEP_STAMP_STAMP_NSEC, x) + /* DEVCPU_QS:XTR:XTR_GRP_CFG */ #define QS_XTR_GRP_CFG(r) __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 0, r, 2, 4) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index 7de55f6a4da8..e3555c94294d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -9,6 +9,37 @@ static struct notifier_block lan966x_netdevice_nb __read_mostly; static struct notifier_block lan966x_switchdev_nb __read_mostly; static struct notifier_block lan966x_switchdev_blocking_nb __read_mostly; +static void lan966x_port_set_mcast_ip_flood(struct lan966x_port *port, + u32 pgid_ip) +{ + struct lan966x *lan966x = port->lan966x; + u32 flood_mask_ip; + + flood_mask_ip = lan_rd(lan966x, ANA_PGID(pgid_ip)); + flood_mask_ip = ANA_PGID_PGID_GET(flood_mask_ip); + + /* If mcast snooping is not enabled then use mcast flood mask + * to decide to enable multicast flooding or not. + */ + if (!port->mcast_ena) { + u32 flood_mask; + + flood_mask = lan_rd(lan966x, ANA_PGID(PGID_MC)); + flood_mask = ANA_PGID_PGID_GET(flood_mask); + + if (flood_mask & BIT(port->chip_port)) + flood_mask_ip |= BIT(port->chip_port); + else + flood_mask_ip &= ~BIT(port->chip_port); + } else { + flood_mask_ip &= ~BIT(port->chip_port); + } + + lan_rmw(ANA_PGID_PGID_SET(flood_mask_ip), + ANA_PGID_PGID, + lan966x, ANA_PGID(pgid_ip)); +} + static void lan966x_port_set_mcast_flood(struct lan966x_port *port, bool enabled) { @@ -23,6 +54,11 @@ static void lan966x_port_set_mcast_flood(struct lan966x_port *port, lan_rmw(ANA_PGID_PGID_SET(val), ANA_PGID_PGID, port->lan966x, ANA_PGID(PGID_MC)); + + if (!port->mcast_ena) { + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4); + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6); + } } static void lan966x_port_set_ucast_flood(struct lan966x_port *port, @@ -144,6 +180,28 @@ static void lan966x_port_ageing_set(struct lan966x_port *port, lan966x_mac_set_ageing(port->lan966x, ageing_time); } +static void lan966x_port_mc_set(struct lan966x_port *port, bool mcast_ena) +{ + struct lan966x *lan966x = port->lan966x; + + port->mcast_ena = mcast_ena; + if (mcast_ena) + lan966x_mdb_restore_entries(lan966x); + else + lan966x_mdb_clear_entries(lan966x); + + lan_rmw(ANA_CPU_FWD_CFG_IGMP_REDIR_ENA_SET(mcast_ena) | + ANA_CPU_FWD_CFG_MLD_REDIR_ENA_SET(mcast_ena) | + ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA_SET(mcast_ena), + ANA_CPU_FWD_CFG_IGMP_REDIR_ENA | + ANA_CPU_FWD_CFG_MLD_REDIR_ENA | + ANA_CPU_FWD_CFG_IPMC_CTRL_COPY_ENA, + lan966x, ANA_CPU_FWD_CFG(port->chip_port)); + + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV4); + lan966x_port_set_mcast_ip_flood(port, PGID_MCIPV6); +} + static int lan966x_port_attr_set(struct net_device *dev, const void *ctx, const struct switchdev_attr *attr, struct netlink_ext_ack *extack) @@ -171,6 +229,9 @@ static int lan966x_port_attr_set(struct net_device *dev, const void *ctx, lan966x_vlan_port_set_vlan_aware(port, attr->u.vlan_filtering); lan966x_vlan_port_apply(port); break; + case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: + lan966x_port_mc_set(port, !attr->u.mc_disabled); + break; default: err = -EOPNOTSUPP; break; @@ -358,6 +419,9 @@ static int lan966x_netdevice_event(struct notifier_block *nb, return notifier_from_errno(ret); } +/* We don't offload uppers such as LAG as bridge ports, so every device except + * the bridge itself is foreign. + */ static bool lan966x_foreign_dev_check(const struct net_device *dev, const struct net_device *foreign_dev) { @@ -365,10 +429,10 @@ static bool lan966x_foreign_dev_check(const struct net_device *dev, struct lan966x *lan966x = port->lan966x; if (netif_is_bridge_master(foreign_dev)) - if (lan966x->bridge != foreign_dev) - return true; + if (lan966x->bridge == foreign_dev) + return false; - return false; + return true; } static int lan966x_switchdev_event(struct notifier_block *nb, @@ -388,8 +452,7 @@ static int lan966x_switchdev_event(struct notifier_block *nb, err = switchdev_handle_fdb_event_to_device(dev, event, ptr, lan966x_netdevice_check, lan966x_foreign_dev_check, - lan966x_handle_fdb, - NULL); + lan966x_handle_fdb); return notifier_from_errno(err); } @@ -402,18 +465,6 @@ static int lan966x_handle_port_vlan_add(struct lan966x_port *port, const struct switchdev_obj_port_vlan *v = SWITCHDEV_OBJ_PORT_VLAN(obj); struct lan966x *lan966x = port->lan966x; - /* When adding a port to a vlan, we get a callback for the port but - * also for the bridge. When get the callback for the bridge just bail - * out. Then when the bridge is added to the vlan, then we get a - * callback here but in this case the flags has set: - * BRIDGE_VLAN_INFO_BRENTRY. In this case it means that the CPU - * port is added to the vlan, so the broadcast frames and unicast frames - * with dmac of the bridge should be foward to CPU. - */ - if (netif_is_bridge_master(obj->orig_dev) && - !(v->flags & BRIDGE_VLAN_INFO_BRENTRY)) - return 0; - if (!netif_is_bridge_master(obj->orig_dev)) lan966x_vlan_port_add_vlan(port, v->vid, v->flags & BRIDGE_VLAN_INFO_PVID, diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile index c271e86ee292..e9dd348a6ebb 100644 --- a/drivers/net/ethernet/microchip/sparx5/Makefile +++ b/drivers/net/ethernet/microchip/sparx5/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o sparx5-switch-objs := sparx5_main.o sparx5_packet.o \ sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \ - sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o + sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o \ + sparx5_ptp.o diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 10b866e9f726..6b0febcb7fa9 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1183,6 +1183,39 @@ static void sparx5_config_port_stats(struct sparx5 *sparx5, int portno) sparx5, ANA_AC_PORT_STAT_CFG(portno, SPX5_PORT_POLICER_DROPS)); } +static int sparx5_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct sparx5_port *port = netdev_priv(dev); + struct sparx5 *sparx5 = port->sparx5; + struct sparx5_phc *phc; + + if (!sparx5->ptp) + return ethtool_op_get_ts_info(dev, info); + + phc = &sparx5->phc[SPARX5_PHC_PORT]; + + info->phc_index = phc->clock ? ptp_clock_index(phc->clock) : -1; + if (info->phc_index == -1) { + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + return 0; + } + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | + BIT(HWTSTAMP_TX_ONESTEP_SYNC); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + const struct ethtool_ops sparx5_ethtool_ops = { .get_sset_count = sparx5_get_sset_count, .get_strings = sparx5_get_sset_strings, @@ -1194,6 +1227,7 @@ const struct ethtool_ops sparx5_ethtool_ops = { .get_eth_mac_stats = sparx5_get_eth_mac_stats, .get_eth_ctrl_stats = sparx5_get_eth_mac_ctrl_stats, .get_rmon_stats = sparx5_get_eth_rmon_stats, + .get_ts_info = sparx5_get_ts_info, }; int sparx_stats_init(struct sparx5 *sparx5) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 7436f62fa152..2dc87584023a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -240,6 +240,8 @@ static bool sparx5_fdma_rx_get_frame(struct sparx5 *sparx5, struct sparx5_rx *rx skb_pull(skb, IFH_LEN * sizeof(u32)); if (likely(!(skb->dev->features & NETIF_F_RXFCS))) skb_trim(skb, skb->len - ETH_FCS_LEN); + + sparx5_ptp_rxtstamp(sparx5, skb, fi.timestamp); skb->protocol = eth_type_trans(skb, skb->dev); /* Everything we see on an interface that is in the HW bridge * has already been forwarded diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 16266275dd36..5f7c7030ce03 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -190,6 +190,7 @@ static const struct sparx5_main_io_resource sparx5_main_iomap[] = { { TARGET_ASM, 0x10600000, 1 }, /* 0x610600000 */ { TARGET_GCB, 0x11010000, 2 }, /* 0x611010000 */ { TARGET_QS, 0x11030000, 2 }, /* 0x611030000 */ + { TARGET_PTP, 0x11040000, 2 }, /* 0x611040000 */ { TARGET_ANA_ACL, 0x11050000, 2 }, /* 0x611050000 */ { TARGET_LRN, 0x11060000, 2 }, /* 0x611060000 */ { TARGET_VCAP_SUPER, 0x11080000, 2 }, /* 0x611080000 */ @@ -291,7 +292,6 @@ static int sparx5_create_port(struct sparx5 *sparx5, /* Create a phylink for PHY management. Also handles SFPs */ spx5_port->phylink_config.dev = &spx5_port->ndev->dev; spx5_port->phylink_config.type = PHYLINK_NETDEV; - spx5_port->phylink_config.pcs_poll = true; spx5_port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD | MAC_2500FD | MAC_5000FD | MAC_10000FD | MAC_25000FD; @@ -328,7 +328,6 @@ static int sparx5_create_port(struct sparx5 *sparx5, return PTR_ERR(phylink); spx5_port->phylink = phylink; - phylink_set_pcs(phylink, &spx5_port->phylink_pcs); return 0; } @@ -694,6 +693,18 @@ static int sparx5_start(struct sparx5 *sparx5) } else { sparx5->xtr_irq = -ENXIO; } + + if (sparx5->ptp_irq >= 0) { + err = devm_request_threaded_irq(sparx5->dev, sparx5->ptp_irq, + NULL, sparx5_ptp_irq_handler, + IRQF_ONESHOT, "sparx5-ptp", + sparx5); + if (err) + sparx5->ptp_irq = -ENXIO; + + sparx5->ptp = 1; + } + return err; } @@ -810,6 +821,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) sparx5->fdma_irq = platform_get_irq_byname(sparx5->pdev, "fdma"); sparx5->xtr_irq = platform_get_irq_byname(sparx5->pdev, "xtr"); + sparx5->ptp_irq = platform_get_irq_byname(sparx5->pdev, "ptp"); /* Read chip ID to check CPU interface */ sparx5->chip_id = spx5_rd(sparx5, GCB_CHIP_ID); @@ -848,6 +860,12 @@ static int mchp_sparx5_probe(struct platform_device *pdev) dev_err(sparx5->dev, "Start failed\n"); goto cleanup_ports; } + + err = sparx5_ptp_init(sparx5); + if (err) { + dev_err(sparx5->dev, "PTP failed\n"); + goto cleanup_ports; + } goto cleanup_config; cleanup_ports: @@ -871,6 +889,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) disable_irq(sparx5->fdma_irq); sparx5->fdma_irq = -ENXIO; } + sparx5_ptp_deinit(sparx5); sparx5_fdma_stop(sparx5); sparx5_cleanup_ports(sparx5); /* Unregister netdevs */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index d40e18ce3293..33892dfc3b2f 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -14,6 +14,8 @@ #include <linux/if_vlan.h> #include <linux/bitmap.h> #include <linux/phylink.h> +#include <linux/net_tstamp.h> +#include <linux/ptp_clock_kernel.h> #include <linux/hrtimer.h> #include "sparx5_main_regs.h" @@ -79,6 +81,18 @@ enum sparx5_vlan_port_type { #define FDMA_RX_DCB_MAX_DBS 15 #define FDMA_TX_DCB_MAX_DBS 1 +#define SPARX5_PHC_COUNT 3 +#define SPARX5_PHC_PORT 0 + +#define IFH_REW_OP_NOOP 0x0 +#define IFH_REW_OP_ONE_STEP_PTP 0x3 +#define IFH_REW_OP_TWO_STEP_PTP 0x4 + +#define IFH_PDU_TYPE_NONE 0x0 +#define IFH_PDU_TYPE_PTP 0x5 +#define IFH_PDU_TYPE_IPV4_UDP_PTP 0x6 +#define IFH_PDU_TYPE_IPV6_UDP_PTP 0x7 + struct sparx5; struct sparx5_db_hw { @@ -167,9 +181,12 @@ struct sparx5_port { enum sparx5_port_max_tags max_vlan_tags; enum sparx5_vlan_port_type vlan_type; u32 custom_etype; - u32 ifh[IFH_LEN]; bool vlan_aware; struct hrtimer inj_timer; + /* ptp */ + u8 ptp_cmd; + u16 ts_id; + struct sk_buff_head tx_skbs; }; enum sparx5_core_clockfreq { @@ -179,6 +196,26 @@ enum sparx5_core_clockfreq { SPX5_CORE_CLOCK_625MHZ, /* 625MHZ core clock frequency */ }; +struct sparx5_phc { + struct ptp_clock *clock; + struct ptp_clock_info info; + struct hwtstamp_config hwtstamp_config; + struct sparx5 *sparx5; + u8 index; +}; + +struct sparx5_skb_cb { + u8 rew_op; + u8 pdu_type; + u8 pdu_w16_offset; + u16 ts_id; + unsigned long jiffies; +}; + +#define SPARX5_PTP_TIMEOUT msecs_to_jiffies(10) +#define SPARX5_SKB_CB(skb) \ + ((struct sparx5_skb_cb *)((skb)->cb)) + struct sparx5 { struct platform_device *pdev; struct device *dev; @@ -226,6 +263,14 @@ struct sparx5 { int fdma_irq; struct sparx5_rx rx; struct sparx5_tx tx; + /* PTP */ + bool ptp; + struct sparx5_phc phc[SPARX5_PHC_COUNT]; + spinlock_t ptp_clock_lock; /* lock for phc */ + spinlock_t ptp_ts_id_lock; /* lock for ts_id */ + struct mutex ptp_lock; /* lock for ptp interface state */ + u16 ptp_skbs; + int ptp_irq; }; /* sparx5_switchdev.c */ @@ -235,6 +280,7 @@ void sparx5_unregister_notifier_blocks(struct sparx5 *sparx5); /* sparx5_packet.c */ struct frame_info { int src_port; + u32 timestamp; }; void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp); @@ -288,12 +334,30 @@ void sparx5_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats int sparx_stats_init(struct sparx5 *sparx5); /* sparx5_netdev.c */ +void sparx5_set_port_ifh_timestamp(void *ifh_hdr, u64 timestamp); +void sparx5_set_port_ifh_rew_op(void *ifh_hdr, u32 rew_op); +void sparx5_set_port_ifh_pdu_type(void *ifh_hdr, u32 pdu_type); +void sparx5_set_port_ifh_pdu_w16_offset(void *ifh_hdr, u32 pdu_w16_offset); +void sparx5_set_port_ifh(void *ifh_hdr, u16 portno); bool sparx5_netdevice_check(const struct net_device *dev); struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno); int sparx5_register_netdevs(struct sparx5 *sparx5); void sparx5_destroy_netdevs(struct sparx5 *sparx5); void sparx5_unregister_netdevs(struct sparx5 *sparx5); +/* sparx5_ptp.c */ +int sparx5_ptp_init(struct sparx5 *sparx5); +void sparx5_ptp_deinit(struct sparx5 *sparx5); +int sparx5_ptp_hwtstamp_set(struct sparx5_port *port, struct ifreq *ifr); +int sparx5_ptp_hwtstamp_get(struct sparx5_port *port, struct ifreq *ifr); +void sparx5_ptp_rxtstamp(struct sparx5 *sparx5, struct sk_buff *skb, + u64 timestamp); +int sparx5_ptp_txtstamp_request(struct sparx5_port *port, + struct sk_buff *skb); +void sparx5_ptp_txtstamp_release(struct sparx5_port *port, + struct sk_buff *skb); +irqreturn_t sparx5_ptp_irq_handler(int irq, void *args); + /* Clock period in picoseconds */ static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock) { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h index 5ab2373a7178..c94de436b281 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h @@ -4,8 +4,8 @@ * Copyright (c) 2021 Microchip Technology Inc. */ -/* This file is autogenerated by cml-utils 2021-05-06 13:06:37 +0200. - * Commit ID: 9ae4ec441e25e4b9003f4e514df5cb12a36b84d3 +/* This file is autogenerated by cml-utils 2022-02-26 14:15:01 +0100. + * Commit ID: 98bdd3d171cc2a1afd30d241d41a4281d471a48c (dirty) */ #ifndef _SPARX5_MAIN_REGS_H_ @@ -40,6 +40,7 @@ enum sparx5_target { TARGET_PCS25G_BR = 144, TARGET_PCS5G_BR = 160, TARGET_PORT_CONF = 173, + TARGET_PTP = 174, TARGET_QFWD = 175, TARGET_QRES = 176, TARGET_QS = 177, @@ -4156,6 +4157,249 @@ enum sparx5_target { #define PORT_CONF_USGMII_CFG_QUAD_MODE_GET(x)\ FIELD_GET(PORT_CONF_USGMII_CFG_QUAD_MODE, x) +/* DEVCPU_PTP:PTP_CFG:PTP_PIN_INTR */ +#define PTP_PTP_PIN_INTR __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 0, 0, 1, 4) + +#define PTP_PTP_PIN_INTR_INTR_PTP GENMASK(4, 0) +#define PTP_PTP_PIN_INTR_INTR_PTP_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_INTR_INTR_PTP, x) +#define PTP_PTP_PIN_INTR_INTR_PTP_GET(x)\ + FIELD_GET(PTP_PTP_PIN_INTR_INTR_PTP, x) + +/* DEVCPU_PTP:PTP_CFG:PTP_PIN_INTR_ENA */ +#define PTP_PTP_PIN_INTR_ENA __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 4, 0, 1, 4) + +#define PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA GENMASK(4, 0) +#define PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA, x) +#define PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA_GET(x)\ + FIELD_GET(PTP_PTP_PIN_INTR_ENA_INTR_PTP_ENA, x) + +/* DEVCPU_PTP:PTP_CFG:PTP_INTR_IDENT */ +#define PTP_PTP_INTR_IDENT __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 8, 0, 1, 4) + +#define PTP_PTP_INTR_IDENT_INTR_PTP_IDENT GENMASK(4, 0) +#define PTP_PTP_INTR_IDENT_INTR_PTP_IDENT_SET(x)\ + FIELD_PREP(PTP_PTP_INTR_IDENT_INTR_PTP_IDENT, x) +#define PTP_PTP_INTR_IDENT_INTR_PTP_IDENT_GET(x)\ + FIELD_GET(PTP_PTP_INTR_IDENT_INTR_PTP_IDENT, x) + +/* DEVCPU_PTP:PTP_CFG:PTP_DOM_CFG */ +#define PTP_PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 320, 0, 1, 16, 12, 0, 1, 4) + +#define PTP_PTP_DOM_CFG_PTP_ENA GENMASK(11, 9) +#define PTP_PTP_DOM_CFG_PTP_ENA_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_ENA, x) +#define PTP_PTP_DOM_CFG_PTP_ENA_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_ENA, x) + +#define PTP_PTP_DOM_CFG_PTP_HOLD GENMASK(8, 6) +#define PTP_PTP_DOM_CFG_PTP_HOLD_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_HOLD, x) +#define PTP_PTP_DOM_CFG_PTP_HOLD_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_HOLD, x) + +#define PTP_PTP_DOM_CFG_PTP_TOD_FREEZE GENMASK(5, 3) +#define PTP_PTP_DOM_CFG_PTP_TOD_FREEZE_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_TOD_FREEZE, x) +#define PTP_PTP_DOM_CFG_PTP_TOD_FREEZE_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_TOD_FREEZE, x) + +#define PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS GENMASK(2, 0) +#define PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(x)\ + FIELD_PREP(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, x) +#define PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_GET(x)\ + FIELD_GET(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:CLK_PER_CFG */ +#define PTP_CLK_PER_CFG(g, r) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 0, r, 2, 4) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_NSEC */ +#define PTP_PTP_CUR_NSEC(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 8, 0, 1, 4) + +#define PTP_PTP_CUR_NSEC_PTP_CUR_NSEC GENMASK(29, 0) +#define PTP_PTP_CUR_NSEC_PTP_CUR_NSEC_SET(x)\ + FIELD_PREP(PTP_PTP_CUR_NSEC_PTP_CUR_NSEC, x) +#define PTP_PTP_CUR_NSEC_PTP_CUR_NSEC_GET(x)\ + FIELD_GET(PTP_PTP_CUR_NSEC_PTP_CUR_NSEC, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_NSEC_FRAC */ +#define PTP_PTP_CUR_NSEC_FRAC(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 12, 0, 1, 4) + +#define PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC GENMASK(7, 0) +#define PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC_SET(x)\ + FIELD_PREP(PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC, x) +#define PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC_GET(x)\ + FIELD_GET(PTP_PTP_CUR_NSEC_FRAC_PTP_CUR_NSEC_FRAC, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_SEC_LSB */ +#define PTP_PTP_CUR_SEC_LSB(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 16, 0, 1, 4) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:PTP_CUR_SEC_MSB */ +#define PTP_PTP_CUR_SEC_MSB(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 20, 0, 1, 4) + +#define PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB GENMASK(15, 0) +#define PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB_SET(x)\ + FIELD_PREP(PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB, x) +#define PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB_GET(x)\ + FIELD_GET(PTP_PTP_CUR_SEC_MSB_PTP_CUR_SEC_MSB, x) + +/* DEVCPU_PTP:PTP_TOD_DOMAINS:NTP_CUR_NSEC */ +#define PTP_NTP_CUR_NSEC(g) __REG(TARGET_PTP, 0, 1, 336, g, 3, 28, 24, 0, 1, 4) + +/* DEVCPU_PTP:PTP_PINS:PTP_PIN_CFG */ +#define PTP_PTP_PIN_CFG(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 0, 0, 1, 4) + +#define PTP_PTP_PIN_CFG_PTP_PIN_ACTION GENMASK(28, 26) +#define PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_ACTION, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_ACTION_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_ACTION, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_SYNC GENMASK(25, 24) +#define PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_SYNC, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_SYNC_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_SYNC, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_INV_POL BIT(23) +#define PTP_PTP_PIN_CFG_PTP_PIN_INV_POL_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_INV_POL, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_INV_POL_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_INV_POL, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_SELECT GENMASK(22, 21) +#define PTP_PTP_PIN_CFG_PTP_PIN_SELECT_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_SELECT, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_SELECT_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_SELECT, x) + +#define PTP_PTP_PIN_CFG_PTP_CLK_SELECT GENMASK(20, 18) +#define PTP_PTP_PIN_CFG_PTP_CLK_SELECT_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_CLK_SELECT, x) +#define PTP_PTP_PIN_CFG_PTP_CLK_SELECT_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_CLK_SELECT, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_DOM GENMASK(17, 16) +#define PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_DOM, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_DOM_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_DOM, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_OPT GENMASK(15, 14) +#define PTP_PTP_PIN_CFG_PTP_PIN_OPT_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_OPT, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_OPT_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_OPT, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK BIT(13) +#define PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_EMBEDDED_CLK, x) + +#define PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS GENMASK(12, 0) +#define PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS_SET(x)\ + FIELD_PREP(PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS, x) +#define PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS_GET(x)\ + FIELD_GET(PTP_PTP_PIN_CFG_PTP_PIN_OUTP_OFS, x) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_SEC_MSB */ +#define PTP_PTP_TOD_SEC_MSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 4, 0, 1, 4) + +#define PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB GENMASK(15, 0) +#define PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB_SET(x)\ + FIELD_PREP(PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB, x) +#define PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB_GET(x)\ + FIELD_GET(PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB, x) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_SEC_LSB */ +#define PTP_PTP_TOD_SEC_LSB(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 8, 0, 1, 4) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_NSEC */ +#define PTP_PTP_TOD_NSEC(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 12, 0, 1, 4) + +#define PTP_PTP_TOD_NSEC_PTP_TOD_NSEC GENMASK(29, 0) +#define PTP_PTP_TOD_NSEC_PTP_TOD_NSEC_SET(x)\ + FIELD_PREP(PTP_PTP_TOD_NSEC_PTP_TOD_NSEC, x) +#define PTP_PTP_TOD_NSEC_PTP_TOD_NSEC_GET(x)\ + FIELD_GET(PTP_PTP_TOD_NSEC_PTP_TOD_NSEC, x) + +/* DEVCPU_PTP:PTP_PINS:PTP_TOD_NSEC_FRAC */ +#define PTP_PTP_TOD_NSEC_FRAC(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 16, 0, 1, 4) + +#define PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC GENMASK(7, 0) +#define PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC_SET(x)\ + FIELD_PREP(PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC, x) +#define PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC_GET(x)\ + FIELD_GET(PTP_PTP_TOD_NSEC_FRAC_PTP_TOD_NSEC_FRAC, x) + +/* DEVCPU_PTP:PTP_PINS:NTP_NSEC */ +#define PTP_NTP_NSEC(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 20, 0, 1, 4) + +/* DEVCPU_PTP:PTP_PINS:PIN_WF_HIGH_PERIOD */ +#define PTP_PIN_WF_HIGH_PERIOD(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 24, 0, 1, 4) + +#define PTP_PIN_WF_HIGH_PERIOD_PIN_WFH GENMASK(29, 0) +#define PTP_PIN_WF_HIGH_PERIOD_PIN_WFH_SET(x)\ + FIELD_PREP(PTP_PIN_WF_HIGH_PERIOD_PIN_WFH, x) +#define PTP_PIN_WF_HIGH_PERIOD_PIN_WFH_GET(x)\ + FIELD_GET(PTP_PIN_WF_HIGH_PERIOD_PIN_WFH, x) + +/* DEVCPU_PTP:PTP_PINS:PIN_WF_LOW_PERIOD */ +#define PTP_PIN_WF_LOW_PERIOD(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 28, 0, 1, 4) + +#define PTP_PIN_WF_LOW_PERIOD_PIN_WFL GENMASK(29, 0) +#define PTP_PIN_WF_LOW_PERIOD_PIN_WFL_SET(x)\ + FIELD_PREP(PTP_PIN_WF_LOW_PERIOD_PIN_WFL, x) +#define PTP_PIN_WF_LOW_PERIOD_PIN_WFL_GET(x)\ + FIELD_GET(PTP_PIN_WF_LOW_PERIOD_PIN_WFL, x) + +/* DEVCPU_PTP:PTP_PINS:PIN_IOBOUNCH_DELAY */ +#define PTP_PIN_IOBOUNCH_DELAY(g) __REG(TARGET_PTP, 0, 1, 0, g, 5, 64, 32, 0, 1, 4) + +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL GENMASK(18, 3) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL_SET(x)\ + FIELD_PREP(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL, x) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL_GET(x)\ + FIELD_GET(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_VAL, x) + +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG GENMASK(2, 0) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG_SET(x)\ + FIELD_PREP(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG, x) +#define PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG_GET(x)\ + FIELD_GET(PTP_PIN_IOBOUNCH_DELAY_PIN_IOBOUNCH_CFG, x) + +/* DEVCPU_PTP:PHASE_DETECTOR_CTRL:PHAD_CTRL */ +#define PTP_PHAD_CTRL(g) __REG(TARGET_PTP, 0, 1, 420, g, 5, 8, 0, 0, 1, 4) + +#define PTP_PHAD_CTRL_PHAD_ENA BIT(7) +#define PTP_PHAD_CTRL_PHAD_ENA_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_PHAD_ENA, x) +#define PTP_PHAD_CTRL_PHAD_ENA_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_PHAD_ENA, x) + +#define PTP_PHAD_CTRL_PHAD_FAILED BIT(6) +#define PTP_PHAD_CTRL_PHAD_FAILED_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_PHAD_FAILED, x) +#define PTP_PHAD_CTRL_PHAD_FAILED_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_PHAD_FAILED, x) + +#define PTP_PHAD_CTRL_REDUCED_RES GENMASK(5, 3) +#define PTP_PHAD_CTRL_REDUCED_RES_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_REDUCED_RES, x) +#define PTP_PHAD_CTRL_REDUCED_RES_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_REDUCED_RES, x) + +#define PTP_PHAD_CTRL_LOCK_ACC GENMASK(2, 0) +#define PTP_PHAD_CTRL_LOCK_ACC_SET(x)\ + FIELD_PREP(PTP_PHAD_CTRL_LOCK_ACC, x) +#define PTP_PHAD_CTRL_LOCK_ACC_GET(x)\ + FIELD_GET(PTP_PHAD_CTRL_LOCK_ACC, x) + +/* DEVCPU_PTP:PHASE_DETECTOR_CTRL:PHAD_CYC_STAT */ +#define PTP_PHAD_CYC_STAT(g) __REG(TARGET_PTP, 0, 1, 420, g, 5, 8, 4, 0, 1, 4) + /* QFWD:SYSTEM:SWITCH_PORT_MODE */ #define QFWD_SWITCH_PORT_MODE(r) __REG(TARGET_QFWD, 0, 1, 0, 0, 1, 340, 0, r, 70, 4) @@ -4528,6 +4772,93 @@ enum sparx5_target { #define REW_TAG_CTRL_TAG_DEI_CFG_GET(x)\ FIELD_GET(REW_TAG_CTRL_TAG_DEI_CFG, x) +/* REW:PTP_CTRL:PTP_TWOSTEP_CTRL */ +#define REW_PTP_TWOSTEP_CTRL __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 0, 0, 1, 4) + +#define REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA BIT(12) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_OVWR_ENA, x) + +#define REW_PTP_TWOSTEP_CTRL_PTP_NXT BIT(11) +#define REW_PTP_TWOSTEP_CTRL_PTP_NXT_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_NXT, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_NXT_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_NXT, x) + +#define REW_PTP_TWOSTEP_CTRL_PTP_VLD BIT(10) +#define REW_PTP_TWOSTEP_CTRL_PTP_VLD_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_VLD, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_VLD_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_VLD, x) + +#define REW_PTP_TWOSTEP_CTRL_STAMP_TX BIT(9) +#define REW_PTP_TWOSTEP_CTRL_STAMP_TX_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_STAMP_TX, x) +#define REW_PTP_TWOSTEP_CTRL_STAMP_TX_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_STAMP_TX, x) + +#define REW_PTP_TWOSTEP_CTRL_STAMP_PORT GENMASK(8, 1) +#define REW_PTP_TWOSTEP_CTRL_STAMP_PORT_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_STAMP_PORT, x) +#define REW_PTP_TWOSTEP_CTRL_STAMP_PORT_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_STAMP_PORT, x) + +#define REW_PTP_TWOSTEP_CTRL_PTP_OVFL BIT(0) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVFL_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_CTRL_PTP_OVFL, x) +#define REW_PTP_TWOSTEP_CTRL_PTP_OVFL_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_CTRL_PTP_OVFL, x) + +/* REW:PTP_CTRL:PTP_TWOSTEP_STAMP */ +#define REW_PTP_TWOSTEP_STAMP __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 4, 0, 1, 4) + +#define REW_PTP_TWOSTEP_STAMP_STAMP_NSEC GENMASK(29, 0) +#define REW_PTP_TWOSTEP_STAMP_STAMP_NSEC_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_STAMP_STAMP_NSEC, x) +#define REW_PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_STAMP_STAMP_NSEC, x) + +/* REW:PTP_CTRL:PTP_TWOSTEP_STAMP_SUBNS */ +#define REW_PTP_TWOSTEP_STAMP_SUBNS __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 8, 0, 1, 4) + +#define REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC GENMASK(7, 0) +#define REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC_SET(x)\ + FIELD_PREP(REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC, x) +#define REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC_GET(x)\ + FIELD_GET(REW_PTP_TWOSTEP_STAMP_SUBNS_STAMP_SUB_NSEC, x) + +/* REW:PTP_CTRL:PTP_RSRV_NOT_ZERO */ +#define REW_PTP_RSRV_NOT_ZERO __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 12, 0, 1, 4) + +/* REW:PTP_CTRL:PTP_RSRV_NOT_ZERO1 */ +#define REW_PTP_RSRV_NOT_ZERO1 __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 16, 0, 1, 4) + +/* REW:PTP_CTRL:PTP_RSRV_NOT_ZERO2 */ +#define REW_PTP_RSRV_NOT_ZERO2 __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 20, 0, 1, 4) + +#define REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2 GENMASK(5, 0) +#define REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2_SET(x)\ + FIELD_PREP(REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2, x) +#define REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2_GET(x)\ + FIELD_GET(REW_PTP_RSRV_NOT_ZERO2_PTP_RSRV_NOT_ZERO2, x) + +/* REW:PTP_CTRL:PTP_GEN_STAMP_FMT */ +#define REW_PTP_GEN_STAMP_FMT(r) __REG(TARGET_REW, 0, 1, 378368, 0, 1, 40, 24, r, 4, 4) + +#define REW_PTP_GEN_STAMP_FMT_RT_OFS GENMASK(6, 2) +#define REW_PTP_GEN_STAMP_FMT_RT_OFS_SET(x)\ + FIELD_PREP(REW_PTP_GEN_STAMP_FMT_RT_OFS, x) +#define REW_PTP_GEN_STAMP_FMT_RT_OFS_GET(x)\ + FIELD_GET(REW_PTP_GEN_STAMP_FMT_RT_OFS, x) + +#define REW_PTP_GEN_STAMP_FMT_RT_FMT GENMASK(1, 0) +#define REW_PTP_GEN_STAMP_FMT_RT_FMT_SET(x)\ + FIELD_PREP(REW_PTP_GEN_STAMP_FMT_RT_FMT, x) +#define REW_PTP_GEN_STAMP_FMT_RT_FMT_GET(x)\ + FIELD_GET(REW_PTP_GEN_STAMP_FMT_RT_FMT, x) + /* REW:RAM_CTRL:RAM_INIT */ #define REW_RAM_INIT __REG(TARGET_REW, 0, 1, 378696, 0, 1, 4, 0, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index e042f117dc7a..af4d3e1f1a6d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -54,7 +54,7 @@ static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) ifh_hdr[byte - 5] |= (u8)((encode & 0xFF0000000000) >> 40); } -static void sparx5_set_port_ifh(void *ifh_hdr, u16 portno) +void sparx5_set_port_ifh(void *ifh_hdr, u16 portno) { /* VSTAX.RSV = 1. MSBit must be 1 */ ifh_encode_bitfield(ifh_hdr, 1, VSTAX + 79, 1); @@ -74,6 +74,26 @@ static void sparx5_set_port_ifh(void *ifh_hdr, u16 portno) ifh_encode_bitfield(ifh_hdr, 1, 67, 1); } +void sparx5_set_port_ifh_rew_op(void *ifh_hdr, u32 rew_op) +{ + ifh_encode_bitfield(ifh_hdr, rew_op, VSTAX + 32, 10); +} + +void sparx5_set_port_ifh_pdu_type(void *ifh_hdr, u32 pdu_type) +{ + ifh_encode_bitfield(ifh_hdr, pdu_type, 191, 4); +} + +void sparx5_set_port_ifh_pdu_w16_offset(void *ifh_hdr, u32 pdu_w16_offset) +{ + ifh_encode_bitfield(ifh_hdr, pdu_w16_offset, 195, 6); +} + +void sparx5_set_port_ifh_timestamp(void *ifh_hdr, u64 timestamp) +{ + ifh_encode_bitfield(ifh_hdr, timestamp, 232, 40); +} + static int sparx5_port_open(struct net_device *ndev) { struct sparx5_port *port = netdev_priv(ndev); @@ -179,6 +199,24 @@ static int sparx5_get_port_parent_id(struct net_device *dev, return 0; } +static int sparx5_port_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct sparx5_port *sparx5_port = netdev_priv(dev); + struct sparx5 *sparx5 = sparx5_port->sparx5; + + if (!phy_has_hwtstamp(dev->phydev) && sparx5->ptp) { + switch (cmd) { + case SIOCSHWTSTAMP: + return sparx5_ptp_hwtstamp_set(sparx5_port, ifr); + case SIOCGHWTSTAMP: + return sparx5_ptp_hwtstamp_get(sparx5_port, ifr); + } + } + + return phy_mii_ioctl(dev->phydev, ifr, cmd); +} + static const struct net_device_ops sparx5_port_netdev_ops = { .ndo_open = sparx5_port_open, .ndo_stop = sparx5_port_stop, @@ -189,6 +227,7 @@ static const struct net_device_ops sparx5_port_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_get_stats64 = sparx5_get_stats64, .ndo_get_port_parent_id = sparx5_get_port_parent_id, + .ndo_eth_ioctl = sparx5_port_ioctl, }; bool sparx5_netdevice_check(const struct net_device *dev) @@ -210,7 +249,6 @@ struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno) spx5_port->ndev = ndev; spx5_port->sparx5 = sparx5; spx5_port->portno = portno; - sparx5_set_port_ifh(spx5_port->ifh, portno); ndev->netdev_ops = &sparx5_port_netdev_ops; ndev->ethtool_ops = &sparx5_ethtool_ops; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 148d431fcde4..304f84aadc36 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -44,6 +44,12 @@ void sparx5_ifh_parse(u32 *ifh, struct frame_info *info) ((u32)xtr_hdr[30] << 0); fwd = (fwd >> 5); info->src_port = FIELD_GET(GENMASK(7, 1), fwd); + + info->timestamp = + ((u64)xtr_hdr[2] << 24) | + ((u64)xtr_hdr[3] << 16) | + ((u64)xtr_hdr[4] << 8) | + ((u64)xtr_hdr[5] << 0); } static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) @@ -144,6 +150,7 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) /* Finish up skb */ skb_put(skb, byte_cnt - ETH_FCS_LEN); eth_skb_pad(skb); + sparx5_ptp_rxtstamp(sparx5, skb, fi.timestamp); skb->protocol = eth_type_trans(skb, netdev); netdev->stats.rx_bytes += skb->len; netdev->stats.rx_packets++; @@ -218,20 +225,44 @@ int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &dev->stats; struct sparx5_port *port = netdev_priv(dev); struct sparx5 *sparx5 = port->sparx5; + u32 ifh[IFH_LEN]; int ret; + memset(ifh, 0, IFH_LEN * 4); + sparx5_set_port_ifh(ifh, port->portno); + + if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { + ret = sparx5_ptp_txtstamp_request(port, skb); + if (ret) + return ret; + + sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op); + sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type); + sparx5_set_port_ifh_pdu_w16_offset(ifh, SPARX5_SKB_CB(skb)->pdu_w16_offset); + sparx5_set_port_ifh_timestamp(ifh, SPARX5_SKB_CB(skb)->ts_id); + } + + skb_tx_timestamp(skb); if (sparx5->fdma_irq > 0) - ret = sparx5_fdma_xmit(sparx5, port->ifh, skb); + ret = sparx5_fdma_xmit(sparx5, ifh, skb); else - ret = sparx5_inject(sparx5, port->ifh, skb, dev); + ret = sparx5_inject(sparx5, ifh, skb, dev); if (ret == NETDEV_TX_OK) { stats->tx_bytes += skb->len; stats->tx_packets++; - skb_tx_timestamp(skb); + + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return ret; + dev_kfree_skb_any(skb); } else { stats->tx_dropped++; + + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + sparx5_ptp_txtstamp_release(port, skb); } return ret; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c index 8ba33bc1a001..830da0e5ff27 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c @@ -26,6 +26,15 @@ static bool port_conf_has_changed(struct sparx5_port_config *a, struct sparx5_po return false; } +static struct phylink_pcs * +sparx5_phylink_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) +{ + struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); + + return &port->phylink_pcs; +} + static void sparx5_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) @@ -130,6 +139,7 @@ const struct phylink_pcs_ops sparx5_phylink_pcs_ops = { const struct phylink_mac_ops sparx5_phylink_mac_ops = { .validate = phylink_generic_validate, + .mac_select_pcs = sparx5_phylink_mac_select_pcs, .mac_config = sparx5_phylink_mac_config, .mac_link_down = sparx5_phylink_mac_link_down, .mac_link_up = sparx5_phylink_mac_link_up, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c new file mode 100644 index 000000000000..cd110c31e5a4 --- /dev/null +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c @@ -0,0 +1,685 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Microchip Sparx5 Switch driver + * + * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries. + * + * The Sparx5 Chip Register Model can be browsed at this location: + * https://github.com/microchip-ung/sparx-5_reginfo + */ +#include <linux/ptp_classify.h> + +#include "sparx5_main_regs.h" +#include "sparx5_main.h" + +#define SPARX5_MAX_PTP_ID 512 + +#define TOD_ACC_PIN 0x4 + +enum { + PTP_PIN_ACTION_IDLE = 0, + PTP_PIN_ACTION_LOAD, + PTP_PIN_ACTION_SAVE, + PTP_PIN_ACTION_CLOCK, + PTP_PIN_ACTION_DELTA, + PTP_PIN_ACTION_TOD +}; + +static u64 sparx5_ptp_get_1ppm(struct sparx5 *sparx5) +{ + /* Represents 1ppm adjustment in 2^59 format with 1.59687500000(625) + * 1.99609375000(500), 3.99218750000(250) as reference + * The value is calculated as following: + * (1/1000000)/((2^-59)/X) + */ + + u64 res = 0; + + switch (sparx5->coreclock) { + case SPX5_CORE_CLOCK_250MHZ: + res = 2301339409586; + break; + case SPX5_CORE_CLOCK_500MHZ: + res = 1150669704793; + break; + case SPX5_CORE_CLOCK_625MHZ: + res = 920535763834; + break; + default: + WARN_ON("Invalid core clock"); + break; + } + + return res; +} + +static u64 sparx5_ptp_get_nominal_value(struct sparx5 *sparx5) +{ + u64 res = 0; + + switch (sparx5->coreclock) { + case SPX5_CORE_CLOCK_250MHZ: + res = 0x1FF0000000000000; + break; + case SPX5_CORE_CLOCK_500MHZ: + res = 0x0FF8000000000000; + break; + case SPX5_CORE_CLOCK_625MHZ: + res = 0x0CC6666666666666; + break; + default: + WARN_ON("Invalid core clock"); + break; + } + + return res; +} + +int sparx5_ptp_hwtstamp_set(struct sparx5_port *port, struct ifreq *ifr) +{ + struct sparx5 *sparx5 = port->sparx5; + struct hwtstamp_config cfg; + struct sparx5_phc *phc; + + /* For now don't allow to run ptp on ports that are part of a bridge, + * because in case of transparent clock the HW will still forward the + * frames, so there would be duplicate frames + */ + + if (test_bit(port->portno, sparx5->bridge_mask)) + return -EINVAL; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + switch (cfg.tx_type) { + case HWTSTAMP_TX_ON: + port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + port->ptp_cmd = IFH_REW_OP_ONE_STEP_PTP; + break; + case HWTSTAMP_TX_OFF: + port->ptp_cmd = IFH_REW_OP_NOOP; + break; + default: + return -ERANGE; + } + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + cfg.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + /* Commit back the result & save it */ + mutex_lock(&sparx5->ptp_lock); + phc = &sparx5->phc[SPARX5_PHC_PORT]; + memcpy(&phc->hwtstamp_config, &cfg, sizeof(cfg)); + mutex_unlock(&sparx5->ptp_lock); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +int sparx5_ptp_hwtstamp_get(struct sparx5_port *port, struct ifreq *ifr) +{ + struct sparx5 *sparx5 = port->sparx5; + struct sparx5_phc *phc; + + phc = &sparx5->phc[SPARX5_PHC_PORT]; + return copy_to_user(ifr->ifr_data, &phc->hwtstamp_config, + sizeof(phc->hwtstamp_config)) ? -EFAULT : 0; +} + +static void sparx5_ptp_classify(struct sparx5_port *port, struct sk_buff *skb, + u8 *rew_op, u8 *pdu_type, u8 *pdu_w16_offset) +{ + struct ptp_header *header; + u8 msgtype; + int type; + + if (port->ptp_cmd == IFH_REW_OP_NOOP) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + *pdu_w16_offset = 0; + return; + } + + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + *pdu_w16_offset = 0; + return; + } + + header = ptp_parse_header(skb, type); + if (!header) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + *pdu_w16_offset = 0; + return; + } + + *pdu_w16_offset = 7; + if (type & PTP_CLASS_L2) + *pdu_type = IFH_PDU_TYPE_PTP; + if (type & PTP_CLASS_IPV4) + *pdu_type = IFH_PDU_TYPE_IPV4_UDP_PTP; + if (type & PTP_CLASS_IPV6) + *pdu_type = IFH_PDU_TYPE_IPV6_UDP_PTP; + + if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { + *rew_op = IFH_REW_OP_TWO_STEP_PTP; + return; + } + + /* If it is sync and run 1 step then set the correct operation, + * otherwise run as 2 step + */ + msgtype = ptp_get_msgtype(header, type); + if ((msgtype & 0xf) == 0) { + *rew_op = IFH_REW_OP_ONE_STEP_PTP; + return; + } + + *rew_op = IFH_REW_OP_TWO_STEP_PTP; +} + +static void sparx5_ptp_txtstamp_old_release(struct sparx5_port *port) +{ + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if time_after(SPARX5_SKB_CB(skb)->jiffies + SPARX5_PTP_TIMEOUT, + jiffies) + break; + + __skb_unlink(skb, &port->tx_skbs); + dev_kfree_skb_any(skb); + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); +} + +int sparx5_ptp_txtstamp_request(struct sparx5_port *port, + struct sk_buff *skb) +{ + struct sparx5 *sparx5 = port->sparx5; + u8 rew_op, pdu_type, pdu_w16_offset; + unsigned long flags; + + sparx5_ptp_classify(port, skb, &rew_op, &pdu_type, &pdu_w16_offset); + SPARX5_SKB_CB(skb)->rew_op = rew_op; + SPARX5_SKB_CB(skb)->pdu_type = pdu_type; + SPARX5_SKB_CB(skb)->pdu_w16_offset = pdu_w16_offset; + + if (rew_op != IFH_REW_OP_TWO_STEP_PTP) + return 0; + + sparx5_ptp_txtstamp_old_release(port); + + spin_lock_irqsave(&sparx5->ptp_ts_id_lock, flags); + if (sparx5->ptp_skbs == SPARX5_MAX_PTP_ID) { + spin_unlock_irqrestore(&sparx5->ptp_ts_id_lock, flags); + return -EBUSY; + } + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + skb_queue_tail(&port->tx_skbs, skb); + SPARX5_SKB_CB(skb)->ts_id = port->ts_id; + SPARX5_SKB_CB(skb)->jiffies = jiffies; + + sparx5->ptp_skbs++; + port->ts_id++; + if (port->ts_id == SPARX5_MAX_PTP_ID) + port->ts_id = 0; + + spin_unlock_irqrestore(&sparx5->ptp_ts_id_lock, flags); + + return 0; +} + +void sparx5_ptp_txtstamp_release(struct sparx5_port *port, + struct sk_buff *skb) +{ + struct sparx5 *sparx5 = port->sparx5; + unsigned long flags; + + spin_lock_irqsave(&sparx5->ptp_ts_id_lock, flags); + port->ts_id--; + sparx5->ptp_skbs--; + skb_unlink(skb, &port->tx_skbs); + spin_unlock_irqrestore(&sparx5->ptp_ts_id_lock, flags); +} + +static void sparx5_get_hwtimestamp(struct sparx5 *sparx5, + struct timespec64 *ts, + u32 nsec) +{ + /* Read current PTP time to get seconds */ + unsigned long flags; + u32 curr_nsec; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(SPARX5_PHC_PORT) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + ts->tv_sec = spx5_rd(sparx5, PTP_PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + curr_nsec = spx5_rd(sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + + ts->tv_nsec = nsec; + + /* Sec has incremented since the ts was registered */ + if (curr_nsec < nsec) + ts->tv_sec--; + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); +} + +irqreturn_t sparx5_ptp_irq_handler(int irq, void *args) +{ + int budget = SPARX5_MAX_PTP_ID; + struct sparx5 *sparx5 = args; + + while (budget--) { + struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct skb_shared_hwtstamps shhwtstamps; + struct sparx5_port *port; + struct timespec64 ts; + unsigned long flags; + u32 val, id, txport; + u32 delay; + + val = spx5_rd(sparx5, REW_PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retrieved */ + if (!(val & REW_PTP_TWOSTEP_CTRL_PTP_VLD)) + break; + + WARN_ON(val & REW_PTP_TWOSTEP_CTRL_PTP_OVFL); + + if (!(val & REW_PTP_TWOSTEP_CTRL_STAMP_TX)) + continue; + + /* Retrieve the ts Tx port */ + txport = REW_PTP_TWOSTEP_CTRL_STAMP_PORT_GET(val); + + /* Retrieve its associated skb */ + port = sparx5->ports[txport]; + + /* Retrieve the delay */ + delay = spx5_rd(sparx5, REW_PTP_TWOSTEP_STAMP); + delay = REW_PTP_TWOSTEP_STAMP_STAMP_NSEC_GET(delay); + + /* Get next timestamp from fifo, which needs to be the + * rx timestamp which represents the id of the frame + */ + spx5_rmw(REW_PTP_TWOSTEP_CTRL_PTP_NXT_SET(1), + REW_PTP_TWOSTEP_CTRL_PTP_NXT, + sparx5, REW_PTP_TWOSTEP_CTRL); + + val = spx5_rd(sparx5, REW_PTP_TWOSTEP_CTRL); + + /* Check if a timestamp can be retried */ + if (!(val & REW_PTP_TWOSTEP_CTRL_PTP_VLD)) + break; + + /* Read RX timestamping to get the ID */ + id = spx5_rd(sparx5, REW_PTP_TWOSTEP_STAMP); + id <<= 8; + id |= spx5_rd(sparx5, REW_PTP_TWOSTEP_STAMP_SUBNS); + + spin_lock_irqsave(&port->tx_skbs.lock, flags); + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if (SPARX5_SKB_CB(skb)->ts_id != id) + continue; + + __skb_unlink(skb, &port->tx_skbs); + skb_match = skb; + break; + } + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + + /* Next ts */ + spx5_rmw(REW_PTP_TWOSTEP_CTRL_PTP_NXT_SET(1), + REW_PTP_TWOSTEP_CTRL_PTP_NXT, + sparx5, REW_PTP_TWOSTEP_CTRL); + + if (WARN_ON(!skb_match)) + continue; + + spin_lock(&sparx5->ptp_ts_id_lock); + sparx5->ptp_skbs--; + spin_unlock(&sparx5->ptp_ts_id_lock); + + /* Get the h/w timestamp */ + sparx5_get_hwtimestamp(sparx5, &ts, delay); + + /* Set the timestamp into the skb */ + shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + skb_tstamp_tx(skb_match, &shhwtstamps); + + dev_kfree_skb_any(skb_match); + } + + return IRQ_HANDLED; +} + +static int sparx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + unsigned long flags; + bool neg_adj = 0; + u64 tod_inc; + u64 ref; + + if (!scaled_ppm) + return 0; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + + tod_inc = sparx5_ptp_get_nominal_value(sparx5); + + /* The multiplication is split in 2 separate additions because of + * overflow issues. If scaled_ppm with 16bit fractional part was bigger + * than 20ppm then we got overflow. + */ + ref = sparx5_ptp_get_1ppm(sparx5) * (scaled_ppm >> 16); + ref += (sparx5_ptp_get_1ppm(sparx5) * (0xffff & scaled_ppm)) >> 16; + tod_inc = neg_adj ? tod_inc - ref : tod_inc + ref; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(1 << BIT(phc->index)), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, + sparx5, PTP_PTP_DOM_CFG); + + spx5_wr((u32)tod_inc & 0xFFFFFFFF, sparx5, + PTP_CLK_PER_CFG(phc->index, 0)); + spx5_wr((u32)(tod_inc >> 32), sparx5, + PTP_CLK_PER_CFG(phc->index, 1)); + + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(0), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, sparx5, + PTP_PTP_DOM_CFG); + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + + return 0; +} + +static int sparx5_ptp_settime64(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + unsigned long flags; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + /* Set new value */ + spx5_wr(PTP_PTP_TOD_SEC_MSB_PTP_TOD_SEC_MSB_SET(upper_32_bits(ts->tv_sec)), + sparx5, PTP_PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + spx5_wr(lower_32_bits(ts->tv_sec), + sparx5, PTP_PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + spx5_wr(ts->tv_nsec, sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Apply new values */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_LOAD) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + + return 0; +} + +static int sparx5_ptp_gettime64(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + unsigned long flags; + time64_t s; + s64 ns; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_SAVE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + s = spx5_rd(sparx5, PTP_PTP_TOD_SEC_MSB(TOD_ACC_PIN)); + s <<= 32; + s |= spx5_rd(sparx5, PTP_PTP_TOD_SEC_LSB(TOD_ACC_PIN)); + ns = spx5_rd(sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + ns &= PTP_PTP_TOD_NSEC_PTP_TOD_NSEC; + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + + /* Deal with negative values */ + if ((ns & 0xFFFFFFF0) == 0x3FFFFFF0) { + s--; + ns &= 0xf; + ns += 999999984; + } + + set_normalized_timespec64(ts, s, ns); + return 0; +} + +static int sparx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct sparx5_phc *phc = container_of(ptp, struct sparx5_phc, info); + struct sparx5 *sparx5 = phc->sparx5; + + if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) { + unsigned long flags; + + spin_lock_irqsave(&sparx5->ptp_clock_lock, flags); + + /* Must be in IDLE mode before the time can be loaded */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_IDLE) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + spx5_wr(PTP_PTP_TOD_NSEC_PTP_TOD_NSEC_SET(delta), + sparx5, PTP_PTP_TOD_NSEC(TOD_ACC_PIN)); + + /* Adjust time with the value of PTP_TOD_NSEC */ + spx5_rmw(PTP_PTP_PIN_CFG_PTP_PIN_ACTION_SET(PTP_PIN_ACTION_DELTA) | + PTP_PTP_PIN_CFG_PTP_PIN_DOM_SET(phc->index) | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC_SET(0), + PTP_PTP_PIN_CFG_PTP_PIN_ACTION | + PTP_PTP_PIN_CFG_PTP_PIN_DOM | + PTP_PTP_PIN_CFG_PTP_PIN_SYNC, + sparx5, PTP_PTP_PIN_CFG(TOD_ACC_PIN)); + + spin_unlock_irqrestore(&sparx5->ptp_clock_lock, flags); + } else { + /* Fall back using sparx5_ptp_settime64 which is not exact */ + struct timespec64 ts; + u64 now; + + sparx5_ptp_gettime64(ptp, &ts); + + now = ktime_to_ns(timespec64_to_ktime(ts)); + ts = ns_to_timespec64(now + delta); + + sparx5_ptp_settime64(ptp, &ts); + } + + return 0; +} + +static struct ptp_clock_info sparx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "sparx5 ptp", + .max_adj = 200000, + .gettime64 = sparx5_ptp_gettime64, + .settime64 = sparx5_ptp_settime64, + .adjtime = sparx5_ptp_adjtime, + .adjfine = sparx5_ptp_adjfine, +}; + +static int sparx5_ptp_phc_init(struct sparx5 *sparx5, + int index, + struct ptp_clock_info *clock_info) +{ + struct sparx5_phc *phc = &sparx5->phc[index]; + + phc->info = *clock_info; + phc->clock = ptp_clock_register(&phc->info, sparx5->dev); + if (IS_ERR(phc->clock)) + return PTR_ERR(phc->clock); + + phc->index = index; + phc->sparx5 = sparx5; + + /* PTP Rx stamping is always enabled. */ + phc->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + + return 0; +} + +int sparx5_ptp_init(struct sparx5 *sparx5) +{ + u64 tod_adj = sparx5_ptp_get_nominal_value(sparx5); + struct sparx5_port *port; + int err, i; + + if (!sparx5->ptp) + return 0; + + for (i = 0; i < SPARX5_PHC_COUNT; ++i) { + err = sparx5_ptp_phc_init(sparx5, i, &sparx5_ptp_clock_info); + if (err) + return err; + } + + spin_lock_init(&sparx5->ptp_clock_lock); + spin_lock_init(&sparx5->ptp_ts_id_lock); + mutex_init(&sparx5->ptp_lock); + + /* Disable master counters */ + spx5_wr(PTP_PTP_DOM_CFG_PTP_ENA_SET(0), sparx5, PTP_PTP_DOM_CFG); + + /* Configure the nominal TOD increment per clock cycle */ + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(0x7), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, + sparx5, PTP_PTP_DOM_CFG); + + for (i = 0; i < SPARX5_PHC_COUNT; ++i) { + spx5_wr((u32)tod_adj & 0xFFFFFFFF, sparx5, + PTP_CLK_PER_CFG(i, 0)); + spx5_wr((u32)(tod_adj >> 32), sparx5, + PTP_CLK_PER_CFG(i, 1)); + } + + spx5_rmw(PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS_SET(0), + PTP_PTP_DOM_CFG_PTP_CLKCFG_DIS, + sparx5, PTP_PTP_DOM_CFG); + + /* Enable master counters */ + spx5_wr(PTP_PTP_DOM_CFG_PTP_ENA_SET(0x7), sparx5, PTP_PTP_DOM_CFG); + + for (i = 0; i < sparx5->port_count; i++) { + port = sparx5->ports[i]; + if (!port) + continue; + + skb_queue_head_init(&port->tx_skbs); + } + + return 0; +} + +void sparx5_ptp_deinit(struct sparx5 *sparx5) +{ + struct sparx5_port *port; + int i; + + for (i = 0; i < sparx5->port_count; i++) { + port = sparx5->ports[i]; + if (!port) + continue; + + skb_queue_purge(&port->tx_skbs); + } + + for (i = 0; i < SPARX5_PHC_COUNT; ++i) + ptp_clock_unregister(sparx5->phc[i].clock); +} + +void sparx5_ptp_rxtstamp(struct sparx5 *sparx5, struct sk_buff *skb, + u64 timestamp) +{ + struct skb_shared_hwtstamps *shhwtstamps; + struct sparx5_phc *phc; + struct timespec64 ts; + u64 full_ts_in_ns; + + if (!sparx5->ptp) + return; + + phc = &sparx5->phc[SPARX5_PHC_PORT]; + sparx5_ptp_gettime64(&phc->info, &ts); + + if (ts.tv_nsec < timestamp) + ts.tv_sec--; + ts.tv_nsec = timestamp; + full_ts_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec); + + shhwtstamps = skb_hwtstamps(skb); + shhwtstamps->hwtstamp = full_ts_in_ns; +} diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index 649ca609884a..dacb87f49552 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -19,11 +19,27 @@ struct sparx5_switchdev_event_work { unsigned long event; }; +static int sparx5_port_attr_pre_bridge_flags(struct sparx5_port *port, + struct switchdev_brport_flags flags) +{ + if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD)) + return -EINVAL; + + return 0; +} + static void sparx5_port_attr_bridge_flags(struct sparx5_port *port, struct switchdev_brport_flags flags) { + int pgid; + if (flags.mask & BR_MCAST_FLOOD) - sparx5_pgid_update_mask(port, PGID_MC_FLOOD, true); + for (pgid = PGID_MC_FLOOD; pgid <= PGID_IPV6_MC_CTRL; pgid++) + sparx5_pgid_update_mask(port, pgid, !!(flags.val & BR_MCAST_FLOOD)); + if (flags.mask & BR_FLOOD) + sparx5_pgid_update_mask(port, PGID_UC_FLOOD, !!(flags.val & BR_FLOOD)); + if (flags.mask & BR_BCAST_FLOOD) + sparx5_pgid_update_mask(port, PGID_BCAST, !!(flags.val & BR_BCAST_FLOOD)); } static void sparx5_attr_stp_state_set(struct sparx5_port *port, @@ -72,6 +88,9 @@ static int sparx5_port_attr_set(struct net_device *dev, const void *ctx, struct sparx5_port *port = netdev_priv(dev); switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + return sparx5_port_attr_pre_bridge_flags(port, + attr->u.brport_flags); case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: sparx5_port_attr_bridge_flags(port, attr->u.brport_flags); break; @@ -369,13 +388,11 @@ static int sparx5_handle_port_vlan_add(struct net_device *dev, struct sparx5_port *port = netdev_priv(dev); if (netif_is_bridge_master(dev)) { - if (v->flags & BRIDGE_VLAN_INFO_BRENTRY) { - struct sparx5 *sparx5 = - container_of(nb, struct sparx5, - switchdev_blocking_nb); + struct sparx5 *sparx5 = + container_of(nb, struct sparx5, + switchdev_blocking_nb); - sparx5_sync_bridge_dev_addr(dev, sparx5, v->vid, true); - } + sparx5_sync_bridge_dev_addr(dev, sparx5, v->vid, true); return 0; } diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 636dfef24a6c..49b85ca578b0 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -663,7 +663,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_context *gc = gd->gdma_context; struct hw_channel_context *hwc; u32 length = gmi->length; - u32 req_msg_size; + size_t req_msg_size; int err; int i; @@ -674,7 +674,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, return -EINVAL; hwc = gc->hwc.driver_data; - req_msg_size = sizeof(*req) + num_page * sizeof(u64); + req_msg_size = struct_size(req, page_addr_list, num_page); if (req_msg_size > hwc->max_req_msg_size) return -EINVAL; diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h index 9a12607fb511..d36405af9432 100644 --- a/drivers/net/ethernet/microsoft/mana/mana.h +++ b/drivers/net/ethernet/microsoft/mana/mana.h @@ -48,7 +48,15 @@ enum TRI_STATE { #define MAX_PORTS_IN_MANA_DEV 256 -struct mana_stats { +struct mana_stats_rx { + u64 packets; + u64 bytes; + u64 xdp_drop; + u64 xdp_tx; + struct u64_stats_sync syncp; +}; + +struct mana_stats_tx { u64 packets; u64 bytes; struct u64_stats_sync syncp; @@ -76,7 +84,7 @@ struct mana_txq { atomic_t pending_sends; - struct mana_stats stats; + struct mana_stats_tx stats; }; /* skb data and frags dma mappings */ @@ -298,10 +306,11 @@ struct mana_rxq { u32 buf_index; - struct mana_stats stats; + struct mana_stats_rx stats; struct bpf_prog __rcu *bpf_prog; struct xdp_rxq_info xdp_rxq; + struct page *xdp_save_page; /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 498d0f999275..b7d3ba1b4d17 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -136,7 +136,7 @@ int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) bool ipv4 = false, ipv6 = false; struct mana_tx_package pkg = {}; struct netdev_queue *net_txq; - struct mana_stats *tx_stats; + struct mana_stats_tx *tx_stats; struct gdma_queue *gdma_sq; unsigned int csum_type; struct mana_txq *txq; @@ -299,7 +299,8 @@ static void mana_get_stats64(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; - struct mana_stats *stats; + struct mana_stats_rx *rx_stats; + struct mana_stats_tx *tx_stats; unsigned int start; u64 packets, bytes; int q; @@ -310,26 +311,26 @@ static void mana_get_stats64(struct net_device *ndev, netdev_stats_to_stats64(st, &ndev->stats); for (q = 0; q < num_queues; q++) { - stats = &apc->rxqs[q]->stats; + rx_stats = &apc->rxqs[q]->stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); st->rx_packets += packets; st->rx_bytes += bytes; } for (q = 0; q < num_queues; q++) { - stats = &apc->tx_qp[q].txq.stats; + tx_stats = &apc->tx_qp[q].txq.stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); st->tx_packets += packets; st->tx_bytes += bytes; @@ -986,7 +987,7 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq) { - struct mana_stats *rx_stats = &rxq->stats; + struct mana_stats_rx *rx_stats = &rxq->stats; struct net_device *ndev = rxq->ndev; uint pkt_len = cqe->ppi[0].pkt_len; u16 rxq_idx = rxq->rxq_idx; @@ -1007,7 +1008,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); if (act != XDP_PASS && act != XDP_TX) - goto drop; + goto drop_xdp; skb = mana_build_skb(buf_va, pkt_len, &xdp); @@ -1034,6 +1035,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); } + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; + + if (act == XDP_TX) + rx_stats->xdp_tx++; + u64_stats_update_end(&rx_stats->syncp); + if (act == XDP_TX) { skb_set_queue_mapping(skb, rxq_idx); mana_xdp_tx(skb, ndev); @@ -1042,15 +1051,19 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, napi_gro_receive(napi, skb); + return; + +drop_xdp: u64_stats_update_begin(&rx_stats->syncp); - rx_stats->packets++; - rx_stats->bytes += pkt_len; + rx_stats->xdp_drop++; u64_stats_update_end(&rx_stats->syncp); - return; drop: - free_page((unsigned long)buf_va); + WARN_ON_ONCE(rxq->xdp_save_page); + rxq->xdp_save_page = virt_to_page(buf_va); + ++ndev->stats.rx_dropped; + return; } @@ -1072,8 +1085,10 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, break; case CQE_RX_TRUNCATED: - netdev_err(ndev, "Dropped a truncated packet\n"); - return; + ++ndev->stats.rx_dropped; + rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; + netdev_warn_once(ndev, "Dropped a truncated packet\n"); + goto drop; case CQE_RX_COALESCED_4: netdev_err(ndev, "RX coalescing is unsupported\n"); @@ -1089,9 +1104,6 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, return; } - if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) - return; - pktlen = oob->ppi[0].pkt_len; if (pktlen == 0) { @@ -1105,7 +1117,13 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, rxbuf_oob = &rxq->rx_oobs[curr]; WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); - new_page = alloc_page(GFP_ATOMIC); + /* Reuse XDP dropped page if available */ + if (rxq->xdp_save_page) { + new_page = rxq->xdp_save_page; + rxq->xdp_save_page = NULL; + } else { + new_page = alloc_page(GFP_ATOMIC); + } if (new_page) { da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize, @@ -1135,6 +1153,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, mana_rx_skb(old_buf, oob, rxq); +drop: mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); mana_post_pkt_rxq(rxq); @@ -1392,6 +1411,9 @@ static void mana_destroy_rxq(struct mana_port_context *apc, mana_deinit_cq(apc, &rxq->rx_cq); + if (rxq->xdp_save_page) + __free_page(rxq->xdp_save_page); + for (i = 0; i < rxq->num_rx_buf; i++) { rx_oob = &rxq->rx_oobs[i]; diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index c3c81ae3fafd..e13f2453eabb 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -23,7 +23,7 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) if (stringset != ETH_SS_STATS) return -EINVAL; - return ARRAY_SIZE(mana_eth_stats) + num_queues * 4; + return ARRAY_SIZE(mana_eth_stats) + num_queues * 6; } static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) @@ -46,6 +46,10 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; sprintf(p, "rx_%d_bytes", i); p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_drop", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_tx", i); + p += ETH_GSTRING_LEN; } for (i = 0; i < num_queues; i++) { @@ -62,9 +66,12 @@ static void mana_get_ethtool_stats(struct net_device *ndev, struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; void *eth_stats = &apc->eth_stats; - struct mana_stats *stats; + struct mana_stats_rx *rx_stats; + struct mana_stats_tx *tx_stats; unsigned int start; u64 packets, bytes; + u64 xdp_drop; + u64 xdp_tx; int q, i = 0; if (!apc->port_is_up) @@ -74,26 +81,30 @@ static void mana_get_ethtool_stats(struct net_device *ndev, data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); for (q = 0; q < num_queues; q++) { - stats = &apc->rxqs[q]->stats; + rx_stats = &apc->rxqs[q]->stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + xdp_drop = rx_stats->xdp_drop; + xdp_tx = rx_stats->xdp_tx; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; + data[i++] = xdp_drop; + data[i++] = xdp_tx; } for (q = 0; q < num_queues; q++) { - stats = &apc->tx_qp[q].txq.stats; + tx_stats = &apc->tx_qp[q].txq.stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 15179b9529e1..afb7dcadb8d2 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -510,14 +510,14 @@ static int moxart_mac_probe(struct platform_device *pdev) } priv->tx_buf_base = kmalloc_array(priv->tx_buf_size, TX_DESC_NUM, - GFP_ATOMIC); + GFP_KERNEL); if (!priv->tx_buf_base) { ret = -ENOMEM; goto init_fail; } priv->rx_buf_base = kmalloc_array(priv->rx_buf_size, RX_DESC_NUM, - GFP_ATOMIC); + GFP_KERNEL); if (!priv->rx_buf_base) { ret = -ENOMEM; goto init_fail; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index fd3ceb74620d..21134125a6e4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -13,6 +13,7 @@ #define TABLE_UPDATE_SLEEP_US 10 #define TABLE_UPDATE_TIMEOUT_US 100000 +#define OCELOT_RSV_VLAN_RANGE_START 4000 struct ocelot_mact_entry { u8 mac[ETH_ALEN]; @@ -221,6 +222,35 @@ static void ocelot_vcap_enable(struct ocelot *ocelot, int port) REW_PORT_CFG, port); } +static int ocelot_single_vlan_aware_bridge(struct ocelot *ocelot, + struct netlink_ext_ack *extack) +{ + struct net_device *bridge = NULL; + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (!ocelot_port || !ocelot_port->bridge || + !br_vlan_enabled(ocelot_port->bridge)) + continue; + + if (!bridge) { + bridge = ocelot_port->bridge; + continue; + } + + if (bridge == ocelot_port->bridge) + continue; + + NL_SET_ERR_MSG_MOD(extack, + "Only one VLAN-aware bridge is supported"); + return -EBUSY; + } + + return 0; +} + static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot) { return ocelot_read(ocelot, ANA_TABLES_VLANACCESS); @@ -347,12 +377,45 @@ static void ocelot_port_manage_port_tag(struct ocelot *ocelot, int port) } } +int ocelot_bridge_num_find(struct ocelot *ocelot, + const struct net_device *bridge) +{ + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (ocelot_port && ocelot_port->bridge == bridge) + return ocelot_port->bridge_num; + } + + return -1; +} +EXPORT_SYMBOL_GPL(ocelot_bridge_num_find); + +static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, + const struct net_device *bridge) +{ + int bridge_num; + + /* Standalone ports use VID 0 */ + if (!bridge) + return 0; + + bridge_num = ocelot_bridge_num_find(ocelot, bridge); + if (WARN_ON(bridge_num < 0)) + return 0; + + /* VLAN-unaware bridges use a reserved VID going from 4095 downwards */ + return VLAN_N_VID - bridge_num - 1; +} + /* Default vlan to clasify for untagged frames (may be zero) */ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - u16 pvid = OCELOT_VLAN_UNAWARE_PVID; + u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); u32 val = 0; ocelot_port->pvid_vlan = pvid_vlan; @@ -466,12 +529,29 @@ static int ocelot_vlan_member_del(struct ocelot *ocelot, int port, u16 vid) return 0; } +static int ocelot_add_vlan_unaware_pvid(struct ocelot *ocelot, int port, + const struct net_device *bridge) +{ + u16 vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + return ocelot_vlan_member_add(ocelot, port, vid, true); +} + +static int ocelot_del_vlan_unaware_pvid(struct ocelot *ocelot, int port, + const struct net_device *bridge) +{ + u16 vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + return ocelot_vlan_member_del(ocelot, port, vid); +} + int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool vlan_aware, struct netlink_ext_ack *extack) { struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1]; struct ocelot_port *ocelot_port = ocelot->ports[port]; struct ocelot_vcap_filter *filter; + int err; u32 val; list_for_each_entry(filter, &block->rules, list) { @@ -483,6 +563,19 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, } } + err = ocelot_single_vlan_aware_bridge(ocelot, extack); + if (err) + return err; + + if (vlan_aware) + err = ocelot_del_vlan_unaware_pvid(ocelot, port, + ocelot_port->bridge); + else + err = ocelot_add_vlan_unaware_pvid(ocelot, port, + ocelot_port->bridge); + if (err) + return err; + ocelot_port->vlan_aware = vlan_aware; if (vlan_aware) @@ -521,6 +614,12 @@ int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid, } } + if (vid > OCELOT_RSV_VLAN_RANGE_START) { + NL_SET_ERR_MSG_MOD(extack, + "VLAN range 4000-4095 reserved for VLAN-unaware bridging"); + return -EBUSY; + } + return 0; } EXPORT_SYMBOL(ocelot_vlan_prepare); @@ -584,11 +683,11 @@ static void ocelot_vlan_init(struct ocelot *ocelot) for (vid = 1; vid < VLAN_N_VID; vid++) ocelot_vlant_set_mask(ocelot, vid, 0); - /* Because VLAN filtering is enabled, we need VID 0 to get untagged - * traffic. It is added automatically if 8021q module is loaded, but - * we can't rely on it since module may be not loaded. + /* We need VID 0 to get traffic on standalone ports. + * It is added automatically if the 8021q module is loaded, but we + * can't rely on that since it might not be. */ - ocelot_vlant_set_mask(ocelot, OCELOT_VLAN_UNAWARE_PVID, all_ports); + ocelot_vlant_set_mask(ocelot, OCELOT_STANDALONE_PVID, all_ports); /* Set vlan ingress filter mask to all ports but the CPU port by * default. @@ -1237,21 +1336,27 @@ void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) } EXPORT_SYMBOL(ocelot_drain_cpu_queue); -int ocelot_fdb_add(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid) +int ocelot_fdb_add(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge) { int pgid = port; if (port == ocelot->npi) pgid = PGID_CPU; + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + return ocelot_mact_learn(ocelot, pgid, addr, vid, ENTRYTYPE_LOCKED); } EXPORT_SYMBOL(ocelot_fdb_add); -int ocelot_fdb_del(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid) +int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr, + u16 vid, const struct net_device *bridge) { + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + return ocelot_mact_forget(ocelot, addr, vid); } EXPORT_SYMBOL(ocelot_fdb_del); @@ -1413,6 +1518,12 @@ int ocelot_fdb_dump(struct ocelot *ocelot, int port, is_static = (entry.type == ENTRYTYPE_LOCKED); + /* Hide the reserved VLANs used for + * VLAN-unaware bridging. + */ + if (entry.vid > OCELOT_RSV_VLAN_RANGE_START) + entry.vid = 0; + err = cb(entry.mac, entry.vid, is_static, data); if (err) break; @@ -1472,9 +1583,9 @@ ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap) trap->key.ipv6.dport.mask = 0xffff; } -static int ocelot_trap_add(struct ocelot *ocelot, int port, - unsigned long cookie, - void (*populate)(struct ocelot_vcap_filter *f)) +int ocelot_trap_add(struct ocelot *ocelot, int port, + unsigned long cookie, bool take_ts, + void (*populate)(struct ocelot_vcap_filter *f)) { struct ocelot_vcap_block *block_vcap_is2; struct ocelot_vcap_filter *trap; @@ -1500,6 +1611,8 @@ static int ocelot_trap_add(struct ocelot *ocelot, int port, trap->action.cpu_copy_ena = true; trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY; trap->action.port_mask = 0; + trap->take_ts = take_ts; + list_add_tail(&trap->trap_list, &ocelot->traps); new = true; } @@ -1511,16 +1624,17 @@ static int ocelot_trap_add(struct ocelot *ocelot, int port, err = ocelot_vcap_filter_replace(ocelot, trap); if (err) { trap->ingress_port_mask &= ~BIT(port); - if (!trap->ingress_port_mask) + if (!trap->ingress_port_mask) { + list_del(&trap->trap_list); kfree(trap); + } return err; } return 0; } -static int ocelot_trap_del(struct ocelot *ocelot, int port, - unsigned long cookie) +int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie) { struct ocelot_vcap_block *block_vcap_is2; struct ocelot_vcap_filter *trap; @@ -1533,39 +1647,42 @@ static int ocelot_trap_del(struct ocelot *ocelot, int port, return 0; trap->ingress_port_mask &= ~BIT(port); - if (!trap->ingress_port_mask) + if (!trap->ingress_port_mask) { + list_del(&trap->trap_list); + return ocelot_vcap_filter_del(ocelot, trap); + } return ocelot_vcap_filter_replace(ocelot, trap); } static int ocelot_l2_ptp_trap_add(struct ocelot *ocelot, int port) { - unsigned long l2_cookie = ocelot->num_phys_ports + 1; + unsigned long l2_cookie = OCELOT_VCAP_IS2_L2_PTP_TRAP(ocelot); - return ocelot_trap_add(ocelot, port, l2_cookie, + return ocelot_trap_add(ocelot, port, l2_cookie, true, ocelot_populate_l2_ptp_trap_key); } static int ocelot_l2_ptp_trap_del(struct ocelot *ocelot, int port) { - unsigned long l2_cookie = ocelot->num_phys_ports + 1; + unsigned long l2_cookie = OCELOT_VCAP_IS2_L2_PTP_TRAP(ocelot); return ocelot_trap_del(ocelot, port, l2_cookie); } static int ocelot_ipv4_ptp_trap_add(struct ocelot *ocelot, int port) { - unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2; - unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3; + unsigned long ipv4_gen_cookie = OCELOT_VCAP_IS2_IPV4_GEN_PTP_TRAP(ocelot); + unsigned long ipv4_ev_cookie = OCELOT_VCAP_IS2_IPV4_EV_PTP_TRAP(ocelot); int err; - err = ocelot_trap_add(ocelot, port, ipv4_ev_cookie, + err = ocelot_trap_add(ocelot, port, ipv4_ev_cookie, true, ocelot_populate_ipv4_ptp_event_trap_key); if (err) return err; - err = ocelot_trap_add(ocelot, port, ipv4_gen_cookie, + err = ocelot_trap_add(ocelot, port, ipv4_gen_cookie, false, ocelot_populate_ipv4_ptp_general_trap_key); if (err) ocelot_trap_del(ocelot, port, ipv4_ev_cookie); @@ -1575,8 +1692,8 @@ static int ocelot_ipv4_ptp_trap_add(struct ocelot *ocelot, int port) static int ocelot_ipv4_ptp_trap_del(struct ocelot *ocelot, int port) { - unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2; - unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3; + unsigned long ipv4_gen_cookie = OCELOT_VCAP_IS2_IPV4_GEN_PTP_TRAP(ocelot); + unsigned long ipv4_ev_cookie = OCELOT_VCAP_IS2_IPV4_EV_PTP_TRAP(ocelot); int err; err = ocelot_trap_del(ocelot, port, ipv4_ev_cookie); @@ -1586,16 +1703,16 @@ static int ocelot_ipv4_ptp_trap_del(struct ocelot *ocelot, int port) static int ocelot_ipv6_ptp_trap_add(struct ocelot *ocelot, int port) { - unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4; - unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5; + unsigned long ipv6_gen_cookie = OCELOT_VCAP_IS2_IPV6_GEN_PTP_TRAP(ocelot); + unsigned long ipv6_ev_cookie = OCELOT_VCAP_IS2_IPV6_EV_PTP_TRAP(ocelot); int err; - err = ocelot_trap_add(ocelot, port, ipv6_ev_cookie, + err = ocelot_trap_add(ocelot, port, ipv6_ev_cookie, true, ocelot_populate_ipv6_ptp_event_trap_key); if (err) return err; - err = ocelot_trap_add(ocelot, port, ipv6_gen_cookie, + err = ocelot_trap_add(ocelot, port, ipv6_gen_cookie, false, ocelot_populate_ipv6_ptp_general_trap_key); if (err) ocelot_trap_del(ocelot, port, ipv6_ev_cookie); @@ -1605,8 +1722,8 @@ static int ocelot_ipv6_ptp_trap_add(struct ocelot *ocelot, int port) static int ocelot_ipv6_ptp_trap_del(struct ocelot *ocelot, int port) { - unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4; - unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5; + unsigned long ipv6_gen_cookie = OCELOT_VCAP_IS2_IPV6_GEN_PTP_TRAP(ocelot); + unsigned long ipv6_ev_cookie = OCELOT_VCAP_IS2_IPV6_EV_PTP_TRAP(ocelot); int err; err = ocelot_trap_del(ocelot, port, ipv6_ev_cookie); @@ -1750,28 +1867,36 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) EXPORT_SYMBOL(ocelot_get_strings); /* Caller must hold &ocelot->stats_lock */ -static void ocelot_update_stats(struct ocelot *ocelot) +static int ocelot_port_update_stats(struct ocelot *ocelot, int port) { - int i, j; + unsigned int idx = port * ocelot->num_stats; + struct ocelot_stats_region *region; + int err, j; - for (i = 0; i < ocelot->num_phys_ports; i++) { - /* Configure the port to read the stats from */ - ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG); + /* Configure the port to read the stats from */ + ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port), SYS_STAT_CFG); - for (j = 0; j < ocelot->num_stats; j++) { - u32 val; - unsigned int idx = i * ocelot->num_stats + j; + list_for_each_entry(region, &ocelot->stats_regions, node) { + err = ocelot_bulk_read_rix(ocelot, SYS_COUNT_RX_OCTETS, + region->offset, region->buf, + region->count); + if (err) + return err; - val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS, - ocelot->stats_layout[j].offset); + for (j = 0; j < region->count; j++) { + u64 *stat = &ocelot->stats[idx + j]; + u64 val = region->buf[j]; - if (val < (ocelot->stats[idx] & U32_MAX)) - ocelot->stats[idx] += (u64)1 << 32; + if (val < (*stat & U32_MAX)) + *stat += (u64)1 << 32; - ocelot->stats[idx] = (ocelot->stats[idx] & - ~(u64)U32_MAX) + val; + *stat = (*stat & ~(u64)U32_MAX) + val; } + + idx += region->count; } + + return err; } static void ocelot_check_stats_work(struct work_struct *work) @@ -1779,29 +1904,40 @@ static void ocelot_check_stats_work(struct work_struct *work) struct delayed_work *del_work = to_delayed_work(work); struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work); + int i, err; mutex_lock(&ocelot->stats_lock); - ocelot_update_stats(ocelot); + for (i = 0; i < ocelot->num_phys_ports; i++) { + err = ocelot_port_update_stats(ocelot, i); + if (err) + break; + } mutex_unlock(&ocelot->stats_lock); + if (err) + dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err); + queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); } void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) { - int i; + int i, err; mutex_lock(&ocelot->stats_lock); /* check and update now */ - ocelot_update_stats(ocelot); + err = ocelot_port_update_stats(ocelot, port); /* Copy all counters */ for (i = 0; i < ocelot->num_stats; i++) *data++ = ocelot->stats[port * ocelot->num_stats + i]; mutex_unlock(&ocelot->stats_lock); + + if (err) + dev_err(ocelot->dev, "Error %d updating ethtool stats\n", err); } EXPORT_SYMBOL(ocelot_get_ethtool_stats); @@ -1814,6 +1950,41 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) } EXPORT_SYMBOL(ocelot_get_sset_count); +static int ocelot_prepare_stats_regions(struct ocelot *ocelot) +{ + struct ocelot_stats_region *region = NULL; + unsigned int last; + int i; + + INIT_LIST_HEAD(&ocelot->stats_regions); + + for (i = 0; i < ocelot->num_stats; i++) { + if (region && ocelot->stats_layout[i].offset == last + 1) { + region->count++; + } else { + region = devm_kzalloc(ocelot->dev, sizeof(*region), + GFP_KERNEL); + if (!region) + return -ENOMEM; + + region->offset = ocelot->stats_layout[i].offset; + region->count = 1; + list_add_tail(®ion->node, &ocelot->stats_regions); + } + + last = ocelot->stats_layout[i].offset; + } + + list_for_each_entry(region, &ocelot->stats_regions, node) { + region->buf = devm_kcalloc(ocelot->dev, region->count, + sizeof(*region->buf), GFP_KERNEL); + if (!region->buf) + return -ENOMEM; + } + + return 0; +} + int ocelot_get_ts_info(struct ocelot *ocelot, int port, struct ethtool_ts_info *info) { @@ -1847,6 +2018,8 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond) u32 mask = 0; int port; + lockdep_assert_held(&ocelot->fwd_domain_lock); + for (port = 0; port < ocelot->num_phys_ports; port++) { struct ocelot_port *ocelot_port = ocelot->ports[port]; @@ -1860,6 +2033,19 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond) return mask; } +/* The logical port number of a LAG is equal to the lowest numbered physical + * port ID present in that LAG. It may change if that port ever leaves the LAG. + */ +static int ocelot_bond_get_id(struct ocelot *ocelot, struct net_device *bond) +{ + int bond_mask = ocelot_get_bond_mask(ocelot, bond); + + if (!bond_mask) + return -ENOENT; + + return __ffs(bond_mask); +} + u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port) { struct ocelot_port *ocelot_port = ocelot->ports[src_port]; @@ -1979,6 +2165,28 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot, bool joining) } EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask); +void ocelot_port_set_dsa_8021q_cpu(struct ocelot *ocelot, int port) +{ + u16 vid; + + ocelot->ports[port]->is_dsa_8021q_cpu = true; + + for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++) + ocelot_vlan_member_add(ocelot, port, vid, true); +} +EXPORT_SYMBOL_GPL(ocelot_port_set_dsa_8021q_cpu); + +void ocelot_port_unset_dsa_8021q_cpu(struct ocelot *ocelot, int port) +{ + u16 vid; + + ocelot->ports[port]->is_dsa_8021q_cpu = false; + + for (vid = OCELOT_RSV_VLAN_RANGE_START; vid < VLAN_N_VID; vid++) + ocelot_vlan_member_del(ocelot, port, vid); +} +EXPORT_SYMBOL_GPL(ocelot_port_unset_dsa_8021q_cpu); + void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) { struct ocelot_port *ocelot_port = ocelot->ports[port]; @@ -2123,7 +2331,8 @@ static void ocelot_encode_ports_to_mdb(unsigned char *addr, } int ocelot_port_mdb_add(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge) { unsigned char addr[ETH_ALEN]; struct ocelot_multicast *mc; @@ -2133,6 +2342,9 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port, if (port == ocelot->npi) port = ocelot->num_phys_ports; + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + mc = ocelot_multicast_get(ocelot, mdb->addr, vid); if (!mc) { /* New entry */ @@ -2179,7 +2391,8 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port, EXPORT_SYMBOL(ocelot_port_mdb_add); int ocelot_port_mdb_del(struct ocelot *ocelot, int port, - const struct switchdev_obj_port_mdb *mdb) + const struct switchdev_obj_port_mdb *mdb, + const struct net_device *bridge) { unsigned char addr[ETH_ALEN]; struct ocelot_multicast *mc; @@ -2189,6 +2402,9 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port, if (port == ocelot->npi) port = ocelot->num_phys_ports; + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + mc = ocelot_multicast_get(ocelot, mdb->addr, vid); if (!mc) return -ENOENT; @@ -2222,18 +2438,30 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_port_mdb_del); -void ocelot_port_bridge_join(struct ocelot *ocelot, int port, - struct net_device *bridge) +int ocelot_port_bridge_join(struct ocelot *ocelot, int port, + struct net_device *bridge, int bridge_num, + struct netlink_ext_ack *extack) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + int err; + + err = ocelot_single_vlan_aware_bridge(ocelot, extack); + if (err) + return err; mutex_lock(&ocelot->fwd_domain_lock); ocelot_port->bridge = bridge; + ocelot_port->bridge_num = bridge_num; ocelot_apply_bridge_fwd_mask(ocelot, true); mutex_unlock(&ocelot->fwd_domain_lock); + + if (br_vlan_enabled(bridge)) + return 0; + + return ocelot_add_vlan_unaware_pvid(ocelot, port, bridge); } EXPORT_SYMBOL(ocelot_port_bridge_join); @@ -2244,7 +2472,11 @@ void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, mutex_lock(&ocelot->fwd_domain_lock); + if (!br_vlan_enabled(bridge)) + ocelot_del_vlan_unaware_pvid(ocelot, port, bridge); + ocelot_port->bridge = NULL; + ocelot_port->bridge_num = -1; ocelot_port_set_pvid(ocelot, port, NULL); ocelot_port_manage_port_tag(ocelot, port); @@ -2353,7 +2585,7 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot) bond = ocelot_port->bond; if (bond) { - int lag = __ffs(ocelot_get_bond_mask(ocelot, bond)); + int lag = ocelot_bond_get_id(ocelot, bond); ocelot_rmw_gix(ocelot, ANA_PORT_PORT_CFG_PORTID_VAL(lag), @@ -2368,6 +2600,46 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot) } } +/* Documentation for PORTID_VAL says: + * Logical port number for front port. If port is not a member of a LLAG, + * then PORTID must be set to the physical port number. + * If port is a member of a LLAG, then PORTID must be set to the common + * PORTID_VAL used for all member ports of the LLAG. + * The value must not exceed the number of physical ports on the device. + * + * This means we have little choice but to migrate FDB entries pointing towards + * a logical port when that changes. + */ +static void ocelot_migrate_lag_fdbs(struct ocelot *ocelot, + struct net_device *bond, + int lag) +{ + struct ocelot_lag_fdb *fdb; + int err; + + lockdep_assert_held(&ocelot->fwd_domain_lock); + + list_for_each_entry(fdb, &ocelot->lag_fdbs, list) { + if (fdb->bond != bond) + continue; + + err = ocelot_mact_forget(ocelot, fdb->addr, fdb->vid); + if (err) { + dev_err(ocelot->dev, + "failed to delete LAG %s FDB %pM vid %d: %pe\n", + bond->name, fdb->addr, fdb->vid, ERR_PTR(err)); + } + + err = ocelot_mact_learn(ocelot, lag, fdb->addr, fdb->vid, + ENTRYTYPE_LOCKED); + if (err) { + dev_err(ocelot->dev, + "failed to migrate LAG %s FDB %pM vid %d: %pe\n", + bond->name, fdb->addr, fdb->vid, ERR_PTR(err)); + } + } +} + int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, struct netdev_lag_upper_info *info) @@ -2392,14 +2664,23 @@ EXPORT_SYMBOL(ocelot_port_lag_join); void ocelot_port_lag_leave(struct ocelot *ocelot, int port, struct net_device *bond) { + int old_lag_id, new_lag_id; + mutex_lock(&ocelot->fwd_domain_lock); + old_lag_id = ocelot_bond_get_id(ocelot, bond); + ocelot->ports[port]->bond = NULL; ocelot_setup_logical_port_ids(ocelot); ocelot_apply_bridge_fwd_mask(ocelot, false); ocelot_set_aggr_pgids(ocelot); + new_lag_id = ocelot_bond_get_id(ocelot, bond); + + if (new_lag_id >= 0 && old_lag_id != new_lag_id) + ocelot_migrate_lag_fdbs(ocelot, bond, new_lag_id); + mutex_unlock(&ocelot->fwd_domain_lock); } EXPORT_SYMBOL(ocelot_port_lag_leave); @@ -2408,13 +2689,83 @@ void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + mutex_lock(&ocelot->fwd_domain_lock); + ocelot_port->lag_tx_active = lag_tx_active; /* Rebalance the LAGs */ ocelot_set_aggr_pgids(ocelot); + + mutex_unlock(&ocelot->fwd_domain_lock); } EXPORT_SYMBOL(ocelot_port_lag_change); +int ocelot_lag_fdb_add(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid, + const struct net_device *bridge) +{ + struct ocelot_lag_fdb *fdb; + int lag, err; + + fdb = kzalloc(sizeof(*fdb), GFP_KERNEL); + if (!fdb) + return -ENOMEM; + + mutex_lock(&ocelot->fwd_domain_lock); + + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + ether_addr_copy(fdb->addr, addr); + fdb->vid = vid; + fdb->bond = bond; + + lag = ocelot_bond_get_id(ocelot, bond); + + err = ocelot_mact_learn(ocelot, lag, addr, vid, ENTRYTYPE_LOCKED); + if (err) { + mutex_unlock(&ocelot->fwd_domain_lock); + kfree(fdb); + return err; + } + + list_add_tail(&fdb->list, &ocelot->lag_fdbs); + mutex_unlock(&ocelot->fwd_domain_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(ocelot_lag_fdb_add); + +int ocelot_lag_fdb_del(struct ocelot *ocelot, struct net_device *bond, + const unsigned char *addr, u16 vid, + const struct net_device *bridge) +{ + struct ocelot_lag_fdb *fdb, *tmp; + + mutex_lock(&ocelot->fwd_domain_lock); + + if (!vid) + vid = ocelot_vlan_unaware_pvid(ocelot, bridge); + + list_for_each_entry_safe(fdb, tmp, &ocelot->lag_fdbs, list) { + if (!ether_addr_equal(fdb->addr, addr) || fdb->vid != vid || + fdb->bond != bond) + continue; + + ocelot_mact_forget(ocelot, addr, vid); + list_del(&fdb->list); + mutex_unlock(&ocelot->fwd_domain_lock); + kfree(fdb); + + return 0; + } + + mutex_unlock(&ocelot->fwd_domain_lock); + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(ocelot_lag_fdb_del); + /* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu. * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG. * In the special case that it's the NPI port that we're configuring, the @@ -2535,6 +2886,9 @@ EXPORT_SYMBOL(ocelot_port_pre_bridge_flags); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags flags) { + if (port == ocelot->npi) + port = ocelot->num_phys_ports; + if (flags.mask & BR_LEARNING) ocelot_port_set_learning(ocelot, port, !!(flags.val & BR_LEARNING)); @@ -2647,7 +3001,7 @@ static void ocelot_cpu_port_init(struct ocelot *ocelot) /* Configure the CPU port to be VLAN aware */ ocelot_write_gix(ocelot, - ANA_PORT_VLAN_CFG_VLAN_VID(OCELOT_VLAN_UNAWARE_PVID) | + ANA_PORT_VLAN_CFG_VLAN_VID(OCELOT_STANDALONE_PVID) | ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1), ANA_PORT_VLAN_CFG, cpu); @@ -2709,6 +3063,7 @@ int ocelot_init(struct ocelot *ocelot) INIT_LIST_HEAD(&ocelot->multicast); INIT_LIST_HEAD(&ocelot->pgids); INIT_LIST_HEAD(&ocelot->vlans); + INIT_LIST_HEAD(&ocelot->lag_fdbs); ocelot_detect_features(ocelot); ocelot_mact_init(ocelot); ocelot_vlan_init(ocelot); @@ -2814,6 +3169,13 @@ int ocelot_init(struct ocelot *ocelot) ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6), ANA_CPUQ_8021_CFG, i); + ret = ocelot_prepare_stats_regions(ocelot); + if (ret) { + destroy_workqueue(ocelot->stats_queue); + destroy_workqueue(ocelot->owq); + return ret; + } + INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index bf4eff6d7086..f8dc0d75eb5d 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -21,11 +21,12 @@ #include <soc/mscc/ocelot_dev.h> #include <soc/mscc/ocelot_ana.h> #include <soc/mscc/ocelot_ptp.h> +#include <soc/mscc/ocelot_vcap.h> #include <soc/mscc/ocelot.h> #include "ocelot_rew.h" #include "ocelot_qs.h" -#define OCELOT_VLAN_UNAWARE_PVID 0 +#define OCELOT_STANDALONE_PVID 0 #define OCELOT_BUFFER_CELL_SZ 60 #define OCELOT_STATS_CHECK_DELAY (2 * HZ) @@ -80,6 +81,9 @@ struct ocelot_multicast { struct ocelot_pgid *pgid; }; +int ocelot_bridge_num_find(struct ocelot *ocelot, + const struct net_device *bridge); + int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data); int ocelot_mact_learn(struct ocelot *ocelot, int port, @@ -102,6 +106,11 @@ int ocelot_port_devlink_init(struct ocelot *ocelot, int port, enum devlink_port_flavour flavour); void ocelot_port_devlink_teardown(struct ocelot *ocelot, int port); +int ocelot_trap_add(struct ocelot *ocelot, int port, + unsigned long cookie, bool take_ts, + void (*populate)(struct ocelot_vcap_filter *f)); +int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie); + extern struct notifier_block ocelot_netdevice_nb; extern struct notifier_block ocelot_switchdev_nb; extern struct notifier_block ocelot_switchdev_blocking_nb; diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 949858891973..b3f5418dc622 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -6,6 +6,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> #include <soc/mscc/ocelot_vcap.h> +#include "ocelot_police.h" #include "ocelot_vcap.h" /* Arbitrarily chosen constants for encoding the VCAP block and lookup number @@ -217,6 +218,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, bool ingress, struct flow_cls_offload *f, struct ocelot_vcap_filter *filter) { + const struct flow_action *action = &f->rule->action; struct netlink_ext_ack *extack = f->common.extack; bool allow_missing_goto_target = false; const struct flow_action_entry *a; @@ -244,7 +246,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->goto_target = -1; filter->type = OCELOT_VCAP_FILTER_DUMMY; - flow_action_for_each(i, a, &f->rule->action) { + flow_action_for_each(i, a, action) { switch (a->id) { case FLOW_ACTION_DROP: if (filter->block_id != VCAP_IS2) { @@ -279,6 +281,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->action.cpu_copy_ena = true; filter->action.cpu_qu_num = 0; filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + list_add_tail(&filter->trap_list, &ocelot->traps); break; case FLOW_ACTION_POLICE: if (filter->block_id == PSFP_BLOCK_ID) { @@ -296,11 +299,11 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, "Last action must be GOTO"); return -EOPNOTSUPP; } - if (a->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + + err = ocelot_policer_validate(action, a, extack); + if (err) + return err; + filter->action.police_ena = true; pol_ix = a->hw_index + ocelot->vcap_pol.base; @@ -840,6 +843,8 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port, ret = ocelot_flower_parse(ocelot, port, ingress, f, filter); if (ret) { + if (!list_empty(&filter->trap_list)) + list_del(&filter->trap_list); kfree(filter); return ret; } diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c index 7390fa3980ec..2067382d0ee1 100644 --- a/drivers/net/ethernet/mscc/ocelot_io.c +++ b/drivers/net/ethernet/mscc/ocelot_io.c @@ -10,6 +10,19 @@ #include "ocelot.h" +int __ocelot_bulk_read_ix(struct ocelot *ocelot, u32 reg, u32 offset, void *buf, + int count) +{ + u16 target = reg >> TARGET_OFFSET; + + WARN_ON(!target); + + return regmap_bulk_read(ocelot->targets[target], + ocelot->map[target][reg & REG_MASK] + offset, + buf, count); +} +EXPORT_SYMBOL_GPL(__ocelot_bulk_read_ix); + u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset) { u16 target = reg >> TARGET_OFFSET; diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c index 1fa58546abdc..3ccec488a304 100644 --- a/drivers/net/ethernet/mscc/ocelot_mrp.c +++ b/drivers/net/ethernet/mscc/ocelot_mrp.c @@ -60,7 +60,7 @@ static int ocelot_mrp_redirect_add_vcap(struct ocelot *ocelot, int src_port, filter->key_type = OCELOT_VCAP_KEY_ETYPE; filter->prio = 1; - filter->id.cookie = src_port; + filter->id.cookie = OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, src_port); filter->id.tc_offload = false; filter->block_id = VCAP_IS2; filter->type = OCELOT_VCAP_FILTER_OFFLOAD; @@ -77,55 +77,46 @@ static int ocelot_mrp_redirect_add_vcap(struct ocelot *ocelot, int src_port, return err; } -static int ocelot_mrp_copy_add_vcap(struct ocelot *ocelot, int port, - int prio, unsigned long cookie) +static void ocelot_populate_mrp_trap_key(struct ocelot_vcap_filter *filter) { const u8 mrp_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }; - struct ocelot_vcap_filter *filter; - int err; - - filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!filter) - return -ENOMEM; - filter->key_type = OCELOT_VCAP_KEY_ETYPE; - filter->prio = prio; - filter->id.cookie = cookie; - filter->id.tc_offload = false; - filter->block_id = VCAP_IS2; - filter->type = OCELOT_VCAP_FILTER_OFFLOAD; - filter->ingress_port_mask = BIT(port); /* Here is possible to use control or test dmac because the mask * doesn't cover the LSB */ ether_addr_copy(filter->key.etype.dmac.value, mrp_test_dmac); ether_addr_copy(filter->key.etype.dmac.mask, mrp_mask); - filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY; - filter->action.port_mask = 0x0; - filter->action.cpu_copy_ena = true; - filter->action.cpu_qu_num = OCELOT_MRP_CPUQ; +} - err = ocelot_vcap_filter_add(ocelot, filter, NULL); - if (err) - kfree(filter); +static int ocelot_mrp_trap_add(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS2_MRP_TRAP(ocelot); - return err; + return ocelot_trap_add(ocelot, port, cookie, false, + ocelot_populate_mrp_trap_key); +} + +static int ocelot_mrp_trap_del(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS2_MRP_TRAP(ocelot); + + return ocelot_trap_del(ocelot, port, cookie); } static void ocelot_mrp_save_mac(struct ocelot *ocelot, struct ocelot_port *port) { ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_test_dmac, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_control_dmac, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); } static void ocelot_mrp_del_mac(struct ocelot *ocelot, struct ocelot_port *port) { - ocelot_mact_forget(ocelot, mrp_test_dmac, OCELOT_VLAN_UNAWARE_PVID); - ocelot_mact_forget(ocelot, mrp_control_dmac, OCELOT_VLAN_UNAWARE_PVID); + ocelot_mact_forget(ocelot, mrp_test_dmac, OCELOT_STANDALONE_PVID); + ocelot_mact_forget(ocelot, mrp_control_dmac, OCELOT_STANDALONE_PVID); } int ocelot_mrp_add(struct ocelot *ocelot, int port, @@ -186,7 +177,7 @@ int ocelot_mrp_add_ring_role(struct ocelot *ocelot, int port, ocelot_mrp_save_mac(ocelot, ocelot_port); if (mrp->ring_role != BR_MRP_RING_ROLE_MRC) - return ocelot_mrp_copy_add_vcap(ocelot, port, 1, port); + return ocelot_mrp_trap_add(ocelot, port); dst_port = ocelot_mrp_find_partner_port(ocelot, ocelot_port); if (dst_port == -1) @@ -196,10 +187,10 @@ int ocelot_mrp_add_ring_role(struct ocelot *ocelot, int port, if (err) return err; - err = ocelot_mrp_copy_add_vcap(ocelot, port, 2, - port + ocelot->num_phys_ports); + err = ocelot_mrp_trap_add(ocelot, port); if (err) { - ocelot_mrp_del_vcap(ocelot, port); + ocelot_mrp_del_vcap(ocelot, + OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port)); return err; } @@ -211,7 +202,7 @@ int ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port, const struct switchdev_obj_ring_role_mrp *mrp) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - int i; + int err, i; if (!ocelot_port) return -EOPNOTSUPP; @@ -222,8 +213,11 @@ int ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port, if (ocelot_port->mrp_ring_id != mrp->ring_id) return 0; - ocelot_mrp_del_vcap(ocelot, port); - ocelot_mrp_del_vcap(ocelot, port + ocelot->num_phys_ports); + err = ocelot_mrp_trap_del(ocelot, port); + if (err) + return err; + + ocelot_mrp_del_vcap(ocelot, OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port)); for (i = 0; i < ocelot->num_phys_ports; ++i) { ocelot_port = ocelot->ports[i]; diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index e271b6225b72..5767e38c0c5a 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -14,6 +14,7 @@ #include <linux/phy/phy.h> #include <net/pkt_cls.h> #include "ocelot.h" +#include "ocelot_police.h" #include "ocelot_vcap.h" #include "ocelot_fdma.h" @@ -258,11 +259,10 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, return -EEXIST; } - if (action->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } + err = ocelot_policer_validate(&f->rule->action, action, + extack); + if (err) + return err; pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8; pol.burst = action->police.burst; @@ -419,7 +419,7 @@ static int ocelot_vlan_vid_del(struct net_device *dev, u16 vid) * with VLAN filtering feature. We need to keep it to receive * untagged traffic. */ - if (vid == OCELOT_VLAN_UNAWARE_PVID) + if (vid == OCELOT_STANDALONE_PVID) return 0; ret = ocelot_vlan_del(ocelot, port, vid); @@ -559,7 +559,7 @@ static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr) struct ocelot_mact_work_ctx w; ether_addr_copy(w.forget.addr, addr); - w.forget.vid = OCELOT_VLAN_UNAWARE_PVID; + w.forget.vid = OCELOT_STANDALONE_PVID; w.type = OCELOT_MACT_FORGET; return ocelot_enqueue_mact_action(ocelot, &w); @@ -573,7 +573,7 @@ static int ocelot_mc_sync(struct net_device *dev, const unsigned char *addr) struct ocelot_mact_work_ctx w; ether_addr_copy(w.learn.addr, addr); - w.learn.vid = OCELOT_VLAN_UNAWARE_PVID; + w.learn.vid = OCELOT_STANDALONE_PVID; w.learn.pgid = PGID_CPU; w.learn.entry_type = ENTRYTYPE_LOCKED; w.type = OCELOT_MACT_LEARN; @@ -608,9 +608,9 @@ static int ocelot_port_set_mac_address(struct net_device *dev, void *p) /* Learn the new net device MAC address in the mac table. */ ocelot_mact_learn(ocelot, PGID_CPU, addr->sa_data, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); /* Then forget the previous one. */ - ocelot_mact_forget(ocelot, dev->dev_addr, OCELOT_VLAN_UNAWARE_PVID); + ocelot_mact_forget(ocelot, dev->dev_addr, OCELOT_STANDALONE_PVID); eth_hw_addr_set(dev, addr->sa_data); return 0; @@ -662,10 +662,11 @@ static int ocelot_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_fdb_add(ocelot, port, addr, vid); + return ocelot_fdb_add(ocelot, port, addr, vid, ocelot_port->bridge); } static int ocelot_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], @@ -673,10 +674,11 @@ static int ocelot_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], const unsigned char *addr, u16 vid) { struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_fdb_del(ocelot, port, addr, vid); + return ocelot_fdb_del(ocelot, port, addr, vid, ocelot_port->bridge); } static int ocelot_port_fdb_dump(struct sk_buff *skb, @@ -988,7 +990,7 @@ static int ocelot_port_obj_add_mdb(struct net_device *dev, struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_port_mdb_add(ocelot, port, mdb); + return ocelot_port_mdb_add(ocelot, port, mdb, ocelot_port->bridge); } static int ocelot_port_obj_del_mdb(struct net_device *dev, @@ -999,7 +1001,7 @@ static int ocelot_port_obj_del_mdb(struct net_device *dev, struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - return ocelot_port_mdb_del(ocelot, port, mdb); + return ocelot_port_mdb_del(ocelot, port, mdb, ocelot_port->bridge); } static int ocelot_port_obj_mrp_add(struct net_device *dev, @@ -1173,6 +1175,33 @@ static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port) return 0; } +static int ocelot_bridge_num_get(struct ocelot *ocelot, + const struct net_device *bridge_dev) +{ + int bridge_num = ocelot_bridge_num_find(ocelot, bridge_dev); + + if (bridge_num < 0) { + /* First port that offloads this bridge */ + bridge_num = find_first_zero_bit(&ocelot->bridges, + ocelot->num_phys_ports); + + set_bit(bridge_num, &ocelot->bridges); + } + + return bridge_num; +} + +static void ocelot_bridge_num_put(struct ocelot *ocelot, + const struct net_device *bridge_dev, + int bridge_num) +{ + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!ocelot_bridge_num_find(ocelot, bridge_dev)) + clear_bit(bridge_num, &ocelot->bridges); +} + static int ocelot_netdevice_bridge_join(struct net_device *dev, struct net_device *brport_dev, struct net_device *bridge, @@ -1182,9 +1211,14 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev, struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - int err; + int bridge_num, err; + + bridge_num = ocelot_bridge_num_get(ocelot, bridge); - ocelot_port_bridge_join(ocelot, port, bridge); + err = ocelot_port_bridge_join(ocelot, port, bridge, bridge_num, + extack); + if (err) + goto err_join; err = switchdev_bridge_port_offload(brport_dev, dev, priv, &ocelot_switchdev_nb, @@ -1205,6 +1239,8 @@ err_switchdev_sync: &ocelot_switchdev_blocking_nb); err_switchdev_offload: ocelot_port_bridge_leave(ocelot, port, bridge); +err_join: + ocelot_bridge_num_put(ocelot, bridge, bridge_num); return err; } @@ -1225,6 +1261,7 @@ static int ocelot_netdevice_bridge_leave(struct net_device *dev, struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; + int bridge_num = ocelot_port->bridge_num; int port = priv->chip_port; int err; @@ -1233,6 +1270,7 @@ static int ocelot_netdevice_bridge_leave(struct net_device *dev, return err; ocelot_port_bridge_leave(ocelot, port, bridge); + ocelot_bridge_num_put(ocelot, bridge, bridge_num); return 0; } @@ -1700,7 +1738,7 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, eth_hw_addr_gen(dev, ocelot->base_mac, port); ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, - OCELOT_VLAN_UNAWARE_PVID, ENTRYTYPE_LOCKED); + OCELOT_STANDALONE_PVID, ENTRYTYPE_LOCKED); ocelot_init_port(ocelot, port); diff --git a/drivers/net/ethernet/mscc/ocelot_police.c b/drivers/net/ethernet/mscc/ocelot_police.c index 6f5068c1041a..a65606bb84a0 100644 --- a/drivers/net/ethernet/mscc/ocelot_police.c +++ b/drivers/net/ethernet/mscc/ocelot_police.c @@ -154,6 +154,47 @@ int qos_policer_conf_set(struct ocelot *ocelot, int port, u32 pol_ix, return 0; } +int ocelot_policer_validate(const struct flow_action *action, + const struct flow_action_entry *a, + struct netlink_ext_ack *extack) +{ + if (a->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (a->police.notexceed.act_id != FLOW_ACTION_PIPE && + a->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (a->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, a)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but police action is not last"); + return -EOPNOTSUPP; + } + + if (a->police.peakrate_bytes_ps || + a->police.avrate || a->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (a->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "Offload does not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} +EXPORT_SYMBOL(ocelot_policer_validate); + int ocelot_port_policer_add(struct ocelot *ocelot, int port, struct ocelot_policer *pol) { diff --git a/drivers/net/ethernet/mscc/ocelot_police.h b/drivers/net/ethernet/mscc/ocelot_police.h index 7adb05f71999..7552995f8b17 100644 --- a/drivers/net/ethernet/mscc/ocelot_police.h +++ b/drivers/net/ethernet/mscc/ocelot_police.h @@ -8,6 +8,7 @@ #define _MSCC_OCELOT_POLICE_H_ #include "ocelot.h" +#include <net/flow_offload.h> enum mscc_qos_rate_mode { MSCC_QOS_RATE_MODE_DISABLED, /* Policer/shaper disabled */ @@ -33,4 +34,8 @@ struct qos_policer_conf { int qos_policer_conf_set(struct ocelot *ocelot, int port, u32 pol_ix, struct qos_policer_conf *conf); +int ocelot_policer_validate(const struct flow_action *action, + const struct flow_action_entry *a, + struct netlink_ext_ack *extack); + #endif /* _MSCC_OCELOT_POLICE_H_ */ diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index d3544413a8a4..b976d480aeb3 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -564,9 +564,9 @@ static void is2_entry_set(struct ocelot *ocelot, int ix, val = proto.value[0]; msk = proto.mask[0]; type = IS2_TYPE_IP_UDP_TCP; - if (msk == 0xff && (val == 6 || val == 17)) { + if (msk == 0xff && (val == IPPROTO_TCP || val == IPPROTO_UDP)) { /* UDP/TCP protocol match */ - tcp = (val == 6 ? + tcp = (val == IPPROTO_TCP ? OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0); vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_TCP, tcp); vcap_key_l4_port_set(vcap, &data, @@ -1195,18 +1195,16 @@ static void ocelot_vcap_block_remove_filter(struct ocelot *ocelot, struct ocelot_vcap_block *block, struct ocelot_vcap_filter *filter) { - struct ocelot_vcap_filter *tmp; - struct list_head *pos, *q; + struct ocelot_vcap_filter *tmp, *n; - list_for_each_safe(pos, q, &block->rules) { - tmp = list_entry(pos, struct ocelot_vcap_filter, list); + list_for_each_entry_safe(tmp, n, &block->rules, list) { if (ocelot_vcap_filter_equal(filter, tmp)) { if (tmp->block_id == VCAP_IS2 && tmp->action.police_ena) ocelot_vcap_policer_del(ocelot, tmp->action.pol_ix); - list_del(pos); + list_del(&tmp->list); kfree(tmp); } } @@ -1401,6 +1399,7 @@ int ocelot_vcap_init(struct ocelot *ocelot) } INIT_LIST_HEAD(&ocelot->dummy_rules); + INIT_LIST_HEAD(&ocelot->traps); INIT_LIST_HEAD(&ocelot->vcap_pol.pol_list); return 0; diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 9cff3d48acbc..9c72b43c1581 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -31,6 +31,7 @@ nfp-objs := \ nfp_net_main.o \ nfp_net_repr.o \ nfp_net_sriov.o \ + nfp_net_xsk.o \ nfp_netvf_main.o \ nfp_port.o \ nfp_shared_buf.o \ diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index a3242b36e216..2c40a3959f94 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -922,6 +922,51 @@ nfp_fl_pedit(const struct flow_action_entry *act, } } +static struct nfp_fl_meter *nfp_fl_meter(char *act_data) +{ + size_t act_size = sizeof(struct nfp_fl_meter); + struct nfp_fl_meter *meter_act; + + meter_act = (struct nfp_fl_meter *)act_data; + + memset(meter_act, 0, act_size); + + meter_act->head.jump_id = NFP_FL_ACTION_OPCODE_METER; + meter_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return meter_act; +} + +static int +nfp_flower_meter_action(struct nfp_app *app, + const struct flow_action_entry *action, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, + struct netlink_ext_ack *extack) +{ + struct nfp_fl_meter *fl_meter; + u32 meter_id; + + if (*a_len + sizeof(struct nfp_fl_meter) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload:meter action size beyond the allowed maximum"); + return -EOPNOTSUPP; + } + + meter_id = action->hw_index; + if (!nfp_flower_search_meter_entry(app, meter_id)) { + NL_SET_ERR_MSG_MOD(extack, + "can not offload flow table with unsupported police action.\n"); + return -EOPNOTSUPP; + } + + fl_meter = nfp_fl_meter(&nfp_fl->action_data[*a_len]); + *a_len += sizeof(struct nfp_fl_meter); + fl_meter->meter_id = cpu_to_be32(meter_id); + + return 0; +} + static int nfp_flower_output_action(struct nfp_app *app, const struct flow_action_entry *act, @@ -985,6 +1030,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_flower_pedit_acts *set_act, bool *pkt_host, struct netlink_ext_ack *extack, int act_idx) { + struct nfp_flower_priv *fl_priv = app->priv; struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_set_tun *set_tun; struct nfp_fl_push_vlan *psh_v; @@ -1149,6 +1195,18 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, *pkt_host = true; break; + case FLOW_ACTION_POLICE: + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported police action in action list"); + return -EOPNOTSUPP; + } + + err = nfp_flower_meter_action(app, act, nfp_fl, a_len, netdev, + extack); + if (err) + return err; + break; default: /* Currently we do not handle any other actions. */ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list"); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 1543e47456d5..68e8a2fb1a29 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -85,6 +85,7 @@ #define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_METER 24 #define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -260,6 +261,12 @@ struct nfp_fl_set_mpls { __be32 lse; }; +struct nfp_fl_meter { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 meter_id; +}; + /* Metadata with L2 (1W/4B) * ---------------------------------------------------------------- * 3 2 1 diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 917c450a7aad..fa902ce2dd82 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -12,7 +12,9 @@ #include <linux/rhashtable.h> #include <linux/time64.h> #include <linux/types.h> +#include <net/flow_offload.h> #include <net/pkt_cls.h> +#include <net/pkt_sched.h> #include <net/tcp.h> #include <linux/workqueue.h> #include <linux/idr.h> @@ -48,6 +50,7 @@ struct nfp_app; #define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_VLAN_QINQ BIT(8) #define NFP_FL_FEATS_QOS_PPS BIT(9) +#define NFP_FL_FEATS_QOS_METER BIT(10) #define NFP_FL_FEATS_HOST_ACK BIT(31) #define NFP_FL_ENABLE_FLOW_MERGE BIT(0) @@ -63,7 +66,8 @@ struct nfp_app; NFP_FL_FEATS_PRE_TUN_RULES | \ NFP_FL_FEATS_IPV6_TUN | \ NFP_FL_FEATS_VLAN_QINQ | \ - NFP_FL_FEATS_QOS_PPS) + NFP_FL_FEATS_QOS_PPS | \ + NFP_FL_FEATS_QOS_METER) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@ -191,6 +195,8 @@ struct nfp_fl_internal_ports { * @qos_stats_work: Workqueue for qos stats processing * @qos_rate_limiters: Current active qos rate limiters * @qos_stats_lock: Lock on qos stats updates + * @meter_stats_lock: Lock on meter stats updates + * @meter_table: Hash table used to store the meter table * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded * @merge_table: Hash table to store merged flows * @ct_zone_table: Hash table used to store the different zones @@ -228,6 +234,8 @@ struct nfp_flower_priv { struct delayed_work qos_stats_work; unsigned int qos_rate_limiters; spinlock_t qos_stats_lock; /* Protect the qos stats */ + struct mutex meter_stats_lock; /* Protect the meter stats */ + struct rhashtable meter_table; int pre_tun_rule_cnt; struct rhashtable merge_table; struct rhashtable ct_zone_table; @@ -374,6 +382,31 @@ struct nfp_fl_stats_frame { __be64 stats_cookie; }; +struct nfp_meter_stats_entry { + u64 pkts; + u64 bytes; + u64 drops; +}; + +struct nfp_meter_entry { + struct rhash_head ht_node; + u32 meter_id; + bool bps; + u32 rate; + u32 burst; + u64 used; + struct nfp_meter_stats { + u64 update; + struct nfp_meter_stats_entry curr; + struct nfp_meter_stats_entry prev; + } stats; +}; + +enum nfp_meter_op { + NFP_METER_ADD, + NFP_METER_DEL, +}; + static inline bool nfp_flower_internal_port_can_offload(struct nfp_app *app, struct net_device *netdev) @@ -569,4 +602,18 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, void nfp_flower_update_merge_stats(struct nfp_app *app, struct nfp_fl_payload *sub_flow); + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act); +int nfp_init_meter_table(struct nfp_app *app); +void nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv); +void nfp_act_stats_reply(struct nfp_app *app, void *pmsg); +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst); +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id); +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index f97eff5afd12..92e8ade4854e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1861,6 +1861,20 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct Qdisc *sch, str return 0; } +static int +nfp_setup_tc_no_dev(struct nfp_app *app, enum tc_setup_type type, void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return nfp_setup_tc_act_offload(app, data); + default: + return -EOPNOTSUPP; + } +} + int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, @@ -1868,7 +1882,7 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void * void (*cleanup)(struct flow_block_cb *block_cb)) { if (!netdev) - return -EOPNOTSUPP; + return nfp_setup_tc_no_dev(cb_priv, type, data); if (!nfp_fl_is_netdev_to_offload(netdev)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 784c6dbf8bc4..3206ba83b1aa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -1,7 +1,11 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Netronome Systems, Inc. */ +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> #include <linux/math64.h> +#include <linux/vmalloc.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> @@ -11,10 +15,14 @@ #define NFP_FL_QOS_UPDATE msecs_to_jiffies(1000) #define NFP_FL_QOS_PPS BIT(15) +#define NFP_FL_QOS_METER BIT(10) struct nfp_police_cfg_head { __be32 flags_opts; - __be32 port; + union { + __be32 meter_id; + __be32 port; + }; }; enum NFP_FL_QOS_TYPES { @@ -46,7 +54,15 @@ enum NFP_FL_QOS_TYPES { * | Committed Information Rate | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * Word[0](FLag options): - * [15] p(pps) 1 for pps ,0 for bps + * [15] p(pps) 1 for pps, 0 for bps + * + * Meter control message + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | Reserved |p| Y |TYPE |E|TSHFV |P| PC|R| + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | meter ID | + * +-------------------------------+-------------------------------+ * */ struct nfp_police_config { @@ -67,6 +83,74 @@ struct nfp_police_stats_reply { __be64 drop_pkts; }; +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst) +{ + struct nfp_police_config *config; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + if (!ingress) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_METER); + + if (ingress) + config->head.port = cpu_to_be32(id); + else + config->head.meter_id = cpu_to_be32(id); + + config->bkt_tkn_p = cpu_to_be32(burst); + config->bkt_tkn_c = cpu_to_be32(burst); + config->pbs = cpu_to_be32(burst); + config->cbs = cpu_to_be32(burst); + config->pir = cpu_to_be32(rate); + config->cir = cpu_to_be32(rate); + nfp_ctrl_tx(app->ctrl, skb); + + return 0; +} + +static int nfp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + static int nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct tc_cls_matchall_offload *flow, @@ -77,15 +161,15 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_priv *fl_priv = app->priv; struct flow_action_entry *action = NULL; struct nfp_flower_repr_priv *repr_priv; - struct nfp_police_config *config; u32 netdev_port_id, i; struct nfp_repr *repr; - struct sk_buff *skb; bool pps_support; u32 bps_num = 0; u32 pps_num = 0; u32 burst; + bool pps; u64 rate; + int err; if (!nfp_netdev_is_nfp_repr(netdev)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); @@ -132,6 +216,11 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, "unsupported offload: qos rate limit offload requires police action"); return -EOPNOTSUPP; } + + err = nfp_policer_validate(&flow->rule->action, action, extack); + if (err) + return err; + if (action->police.rate_bytes_ps > 0) { if (bps_num++) { NL_SET_ERR_MSG_MOD(extack, @@ -169,23 +258,12 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, } if (rate != 0) { - skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), - NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - config = nfp_flower_cmsg_get_data(skb); - memset(config, 0, sizeof(struct nfp_police_config)); + pps = false; if (action->police.rate_pkt_ps > 0) - config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS); - config->head.port = cpu_to_be32(netdev_port_id); - config->bkt_tkn_p = cpu_to_be32(burst); - config->bkt_tkn_c = cpu_to_be32(burst); - config->pbs = cpu_to_be32(burst); - config->cbs = cpu_to_be32(burst); - config->pir = cpu_to_be32(rate); - config->cir = cpu_to_be32(rate); - nfp_ctrl_tx(repr->app->ctrl, skb); + pps = true; + nfp_flower_offload_one_police(repr->app, true, + pps, netdev_port_id, + rate, burst); } } repr_priv->qos_table.netdev_port_id = netdev_port_id; @@ -266,6 +344,9 @@ void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb) u32 netdev_port_id; msg = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(msg->head.flags_opts) & NFP_FL_QOS_METER) + return nfp_act_stats_reply(app, msg); + netdev_port_id = be32_to_cpu(msg->head.port); rcu_read_lock(); netdev = nfp_app_dev_get(app, netdev_port_id, NULL); @@ -297,7 +378,7 @@ exit_unlock_rcu: static void nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, - u32 netdev_port_id) + u32 id, bool ingress) { struct nfp_police_cfg_head *head; struct sk_buff *skb; @@ -308,10 +389,15 @@ nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, GFP_ATOMIC); if (!skb) return; - head = nfp_flower_cmsg_get_data(skb); + memset(head, 0, sizeof(struct nfp_police_cfg_head)); - head->port = cpu_to_be32(netdev_port_id); + if (ingress) { + head->port = cpu_to_be32(id); + } else { + head->flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + head->meter_id = cpu_to_be32(id); + } nfp_ctrl_tx(fl_priv->app->ctrl, skb); } @@ -341,7 +427,8 @@ nfp_flower_stats_rlim_request_all(struct nfp_flower_priv *fl_priv) if (!netdev_port_id) continue; - nfp_flower_stats_rlim_request(fl_priv, netdev_port_id); + nfp_flower_stats_rlim_request(fl_priv, + netdev_port_id, true); } } @@ -359,6 +446,8 @@ static void update_stats_cache(struct work_struct *work) qos_stats_work); nfp_flower_stats_rlim_request_all(fl_priv); + nfp_flower_stats_meter_request_all(fl_priv); + schedule_delayed_work(&fl_priv->qos_stats_work, NFP_FL_QOS_UPDATE); } @@ -406,6 +495,9 @@ void nfp_flower_qos_init(struct nfp_app *app) struct nfp_flower_priv *fl_priv = app->priv; spin_lock_init(&fl_priv->qos_stats_lock); + mutex_init(&fl_priv->meter_stats_lock); + nfp_init_meter_table(app); + INIT_DELAYED_WORK(&fl_priv->qos_stats_work, &update_stats_cache); } @@ -441,3 +533,333 @@ int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } } + +/* offload tc action, currently only for tc police */ + +static const struct rhashtable_params stats_meter_table_params = { + .key_offset = offsetof(struct nfp_meter_entry, meter_id), + .head_offset = offsetof(struct nfp_meter_entry, ht_node), + .key_len = sizeof(u32), +}; + +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); +} + +static struct nfp_meter_entry * +nfp_flower_add_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, + &meter_id, + stats_meter_table_params); + if (meter_entry) + return meter_entry; + + meter_entry = kzalloc(sizeof(*meter_entry), GFP_KERNEL); + if (!meter_entry) + return NULL; + + meter_entry->meter_id = meter_id; + meter_entry->used = jiffies; + if (rhashtable_insert_fast(&priv->meter_table, &meter_entry->ht_node, + stats_meter_table_params)) { + kfree(meter_entry); + return NULL; + } + + priv->qos_rate_limiters++; + if (priv->qos_rate_limiters == 1) + schedule_delayed_work(&priv->qos_stats_work, + NFP_FL_QOS_UPDATE); + + return meter_entry; +} + +static void nfp_flower_del_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); + if (!meter_entry) + return; + + rhashtable_remove_fast(&priv->meter_table, + &meter_entry->ht_node, + stats_meter_table_params); + kfree(meter_entry); + priv->qos_rate_limiters--; + if (!priv->qos_rate_limiters) + cancel_delayed_work_sync(&priv->qos_stats_work); +} + +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + int err = 0; + + mutex_lock(&fl_priv->meter_stats_lock); + + switch (op) { + case NFP_METER_DEL: + nfp_flower_del_meter_entry(app, meter_id); + goto exit_unlock; + case NFP_METER_ADD: + meter_entry = nfp_flower_add_meter_entry(app, meter_id); + break; + default: + err = -EOPNOTSUPP; + goto exit_unlock; + } + + if (!meter_entry) { + err = -ENOMEM; + goto exit_unlock; + } + + if (action->police.rate_bytes_ps > 0) { + meter_entry->bps = true; + meter_entry->rate = action->police.rate_bytes_ps; + meter_entry->burst = action->police.burst; + } else { + meter_entry->bps = false; + meter_entry->rate = action->police.rate_pkt_ps; + meter_entry->burst = action->police.burst_pkt; + } + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_init_meter_table(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_init(&priv->meter_table, &stats_meter_table_params); +} + +void +nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct rhashtable_iter iter; + + mutex_lock(&fl_priv->meter_stats_lock); + rhashtable_walk_enter(&fl_priv->meter_table, &iter); + rhashtable_walk_start(&iter); + + while ((meter_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(meter_entry)) + continue; + nfp_flower_stats_rlim_request(fl_priv, + meter_entry->meter_id, false); + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_install_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *paction = &fl_act->action.entries[0]; + u32 action_num = fl_act->action.num_entries; + struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; + u32 burst, i, meter_id; + bool pps_support, pps; + bool add = false; + u64 rate; + + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + for (i = 0 ; i < action_num; i++) { + /*set qos associate data for this interface */ + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + continue; + } + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0 && pps_support) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported qos rate limit"); + continue; + } + + if (rate != 0) { + meter_id = action->hw_index; + if (nfp_flower_setup_meter_entry(app, action, NFP_METER_ADD, meter_id)) + continue; + + pps = false; + if (action->police.rate_pkt_ps > 0) + pps = true; + nfp_flower_offload_one_police(app, false, pps, meter_id, + rate, burst); + add = true; + } + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +nfp_act_remove_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_config *config; + struct sk_buff *skb; + u32 meter_id; + bool pps; + + /*delete qos associate data for this interface */ + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + meter_id = fl_act->index; + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) { + NL_SET_ERR_MSG_MOD(extack, + "no meter entry when delete the action index."); + return -ENOENT; + } + pps = !meter_entry->bps; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + config->head.meter_id = cpu_to_be32(meter_id); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + + nfp_ctrl_tx(app->ctrl, skb); + nfp_flower_setup_meter_entry(app, NULL, NFP_METER_DEL, meter_id); + + return 0; +} + +void +nfp_act_stats_reply(struct nfp_app *app, void *pmsg) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_stats_reply *msg = pmsg; + u32 meter_id; + + meter_id = be32_to_cpu(msg->head.meter_id); + mutex_lock(&fl_priv->meter_stats_lock); + + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) + goto exit_unlock; + + meter_entry->stats.curr.pkts = be64_to_cpu(msg->pass_pkts) + + be64_to_cpu(msg->drop_pkts); + meter_entry->stats.curr.bytes = be64_to_cpu(msg->pass_bytes) + + be64_to_cpu(msg->drop_bytes); + meter_entry->stats.curr.drops = be64_to_cpu(msg->drop_pkts); + if (!meter_entry->stats.update) { + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + } + + meter_entry->stats.update = jiffies; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_stats_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + u64 diff_bytes, diff_pkts, diff_drops; + int err = 0; + + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + mutex_lock(&fl_priv->meter_stats_lock); + meter_entry = nfp_flower_search_meter_entry(app, fl_act->index); + if (!meter_entry) { + err = -ENOENT; + goto exit_unlock; + } + diff_pkts = meter_entry->stats.curr.pkts > meter_entry->stats.prev.pkts ? + meter_entry->stats.curr.pkts - meter_entry->stats.prev.pkts : 0; + diff_bytes = meter_entry->stats.curr.bytes > meter_entry->stats.prev.bytes ? + meter_entry->stats.curr.bytes - meter_entry->stats.prev.bytes : 0; + diff_drops = meter_entry->stats.curr.drops > meter_entry->stats.prev.drops ? + meter_entry->stats.curr.drops - meter_entry->stats.prev.drops : 0; + + flow_stats_update(&fl_act->stats, diff_bytes, diff_pkts, diff_drops, + meter_entry->stats.update, + FLOW_ACTION_HW_STATS_DELAYED); + + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act) +{ + struct netlink_ext_ack *extack = fl_act->extack; + struct nfp_flower_priv *fl_priv = app->priv; + + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) + return -EOPNOTSUPP; + + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return nfp_act_install_actions(app, fl_act, extack); + case FLOW_ACT_DESTROY: + return nfp_act_remove_actions(app, fl_act, extack); + case FLOW_ACT_STATS: + return nfp_act_stats_actions(app, fl_act, extack); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index cb43651ea9ba..c71bd555f482 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -356,7 +356,7 @@ __nfp_tun_add_route_to_cache(struct list_head *route_list, return 0; } - entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC); + entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC); if (!entry) { spin_unlock_bh(list_lock); return -ENOMEM; @@ -942,8 +942,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, if (!nfp_mac_idx) { /* Assign a global index if non-repr or MAC is now shared. */ if (entry || !port) { - ida_idx = ida_simple_get(&priv->tun.mac_off_ids, 0, - NFP_MAX_MAC_INDEX, GFP_KERNEL); + ida_idx = ida_alloc_max(&priv->tun.mac_off_ids, + NFP_MAX_MAC_INDEX, GFP_KERNEL); if (ida_idx < 0) return ida_idx; @@ -998,7 +998,7 @@ err_free_entry: kfree(entry); err_free_ida: if (ida_idx != -1) - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + ida_free(&priv->tun.mac_off_ids, ida_idx); return err; } @@ -1061,7 +1061,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, } ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + ida_free(&priv->tun.mac_off_ids, ida_idx); entry->index = nfp_mac_idx; return 0; } @@ -1081,7 +1081,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, /* If MAC has global ID then extract and free the ida entry. */ if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + ida_free(&priv->tun.mac_off_ids, ida_idx); } kfree(entry); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 0b1865e9f0b5..437a19722fcf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -109,6 +109,7 @@ struct nfp_eth_table_port; struct nfp_net; struct nfp_net_r_vector; struct nfp_port; +struct xsk_buff_pool; /* Convenience macro for wrapping descriptor index on ring size */ #define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1)) @@ -170,11 +171,14 @@ struct nfp_net_tx_desc { * struct nfp_net_tx_buf - software TX buffer descriptor * @skb: normal ring, sk_buff associated with this buffer * @frag: XDP ring, page frag associated with this buffer + * @xdp: XSK buffer pool handle (for AF_XDP) * @dma_addr: DMA mapping address of the buffer * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) * @pkt_cnt: Number of packets to be produced out of the skb associated * with this buffer (valid only on the head's buffer). * Will be 1 for all non-TSO packets. + * @is_xsk_tx: Flag if buffer is a RX buffer after a XDP_TX action and not a + * buffer from the TX queue (for AF_XDP). * @real_len: Number of bytes which to be produced out of the skb (valid only * on the head's buffer). Equal to skb->len for non-TSO packets. */ @@ -182,10 +186,18 @@ struct nfp_net_tx_buf { union { struct sk_buff *skb; void *frag; + struct xdp_buff *xdp; }; dma_addr_t dma_addr; - short int fidx; - u16 pkt_cnt; + union { + struct { + short int fidx; + u16 pkt_cnt; + }; + struct { + bool is_xsk_tx; + }; + }; u32 real_len; }; @@ -315,6 +327,16 @@ struct nfp_net_rx_buf { }; /** + * struct nfp_net_xsk_rx_buf - software RX XSK buffer descriptor + * @dma_addr: DMA mapping address of the buffer + * @xdp: XSK buffer pool handle (for AF_XDP) + */ +struct nfp_net_xsk_rx_buf { + dma_addr_t dma_addr; + struct xdp_buff *xdp; +}; + +/** * struct nfp_net_rx_ring - RX ring structure * @r_vec: Back pointer to ring vector structure * @cnt: Size of the queue in number of descriptors @@ -324,6 +346,7 @@ struct nfp_net_rx_buf { * @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist * @qcp_fl: Pointer to base of the QCP freelist queue * @rxbufs: Array of transmitted FL/RX buffers + * @xsk_rxbufs: Array of transmitted FL/RX buffers (for AF_XDP) * @rxds: Virtual address of FL/RX ring in host memory * @xdp_rxq: RX-ring info avail for XDP * @dma: DMA address of the FL/RX ring @@ -342,6 +365,7 @@ struct nfp_net_rx_ring { u8 __iomem *qcp_fl; struct nfp_net_rx_buf *rxbufs; + struct nfp_net_xsk_rx_buf *xsk_rxbufs; struct nfp_net_rx_desc *rxds; struct xdp_rxq_info xdp_rxq; @@ -360,6 +384,7 @@ struct nfp_net_rx_ring { * @tx_ring: Pointer to TX ring * @rx_ring: Pointer to RX ring * @xdp_ring: Pointer to an extra TX ring for XDP + * @xsk_pool: XSK buffer pool active on vector queue pair (for AF_XDP) * @irq_entry: MSI-X table entry (use for talking to the device) * @event_ctr: Number of interrupt * @rx_dim: Dynamic interrupt moderation structure for RX @@ -431,6 +456,7 @@ struct nfp_net_r_vector { u64 rx_replace_buf_alloc_fail; struct nfp_net_tx_ring *xdp_ring; + struct xsk_buff_pool *xsk_pool; struct u64_stats_sync tx_sync; u64 tx_pkts; @@ -501,6 +527,7 @@ struct nfp_stat_pair { * @num_stack_tx_rings: Number of TX rings used by the stack (not XDP) * @num_rx_rings: Currently configured number of RX rings * @mtu: Device MTU + * @xsk_pools: XSK buffer pools, @max_r_vecs in size (for AF_XDP). */ struct nfp_net_dp { struct device *dev; @@ -537,6 +564,8 @@ struct nfp_net_dp { unsigned int num_rx_rings; unsigned int mtu; + + struct xsk_buff_pool **xsk_pools; }; /** @@ -965,6 +994,7 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd); void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update); int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn); +void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr); unsigned int nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries, unsigned int min_irqs, unsigned int want_irqs); @@ -973,6 +1003,19 @@ void nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries, unsigned int n); +void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring); +void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget); + +bool +nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len); + +void nfp_net_rx_csum(const struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, + struct sk_buff *skb); + struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn); int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 79257ec41987..d5ff80a62882 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -38,6 +38,7 @@ #include <net/tls.h> #include <net/vxlan.h> +#include <net/xdp_sock_drv.h> #include "nfpcore/nfp_nsp.h" #include "ccm.h" @@ -45,6 +46,7 @@ #include "nfp_net_ctrl.h" #include "nfp_net.h" #include "nfp_net_sriov.h" +#include "nfp_net_xsk.h" #include "nfp_port.h" #include "crypto/crypto.h" #include "crypto/fw.h" @@ -381,7 +383,7 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd) * * Clear the ICR for the IRQ entry. */ -static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) +void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) { nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); nn_pci_flush(nn); @@ -923,7 +925,7 @@ static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) #endif } -static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) +void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) { wmb(); nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); @@ -1142,7 +1144,7 @@ err_flush: * @tx_ring: TX ring structure * @budget: NAPI budget (only used as bool to determine if in NAPI context) */ -static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) +void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; @@ -1315,6 +1317,9 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) tx_ring->rd_p++; } + if (tx_ring->is_xdp) + nfp_net_xsk_tx_bufs_free(tx_ring); + memset(tx_ring->txds, 0, tx_ring->size); tx_ring->wr_p = 0; tx_ring->rd_p = 0; @@ -1338,24 +1343,43 @@ static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) /* Receive processing */ static unsigned int -nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp) +nfp_net_calc_fl_bufsz_data(struct nfp_net_dp *dp) { - unsigned int fl_bufsz; + unsigned int fl_bufsz = 0; - fl_bufsz = NFP_NET_RX_BUF_HEADROOM; - fl_bufsz += dp->rx_dma_off; if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) fl_bufsz += NFP_NET_MAX_PREPEND; else fl_bufsz += dp->rx_offset; fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu; + return fl_bufsz; +} + +static unsigned int nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp) +{ + unsigned int fl_bufsz; + + fl_bufsz = NFP_NET_RX_BUF_HEADROOM; + fl_bufsz += dp->rx_dma_off; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); + fl_bufsz = SKB_DATA_ALIGN(fl_bufsz); fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); return fl_bufsz; } +static unsigned int nfp_net_calc_fl_bufsz_xsk(struct nfp_net_dp *dp) +{ + unsigned int fl_bufsz; + + fl_bufsz = XDP_PACKET_HEADROOM; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); + + return fl_bufsz; +} + static void nfp_net_free_frag(void *frag, bool xdp) { @@ -1484,10 +1508,14 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) /* Move the empty entry to the end of the list */ wr_idx = D_IDX(rx_ring, rx_ring->wr_p); last_idx = rx_ring->cnt - 1; - rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr; - rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag; - rx_ring->rxbufs[last_idx].dma_addr = 0; - rx_ring->rxbufs[last_idx].frag = NULL; + if (rx_ring->r_vec->xsk_pool) { + rx_ring->xsk_rxbufs[wr_idx] = rx_ring->xsk_rxbufs[last_idx]; + memset(&rx_ring->xsk_rxbufs[last_idx], 0, + sizeof(*rx_ring->xsk_rxbufs)); + } else { + rx_ring->rxbufs[wr_idx] = rx_ring->rxbufs[last_idx]; + memset(&rx_ring->rxbufs[last_idx], 0, sizeof(*rx_ring->rxbufs)); + } memset(rx_ring->rxds, 0, rx_ring->size); rx_ring->wr_p = 0; @@ -1509,6 +1537,9 @@ nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp, { unsigned int i; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return; + for (i = 0; i < rx_ring->cnt - 1; i++) { /* NULL skb can only happen when initial filling of the ring * fails to allocate enough buffers and calls here to free @@ -1536,6 +1567,9 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_buf *rxbufs; unsigned int i; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return 0; + rxbufs = rx_ring->rxbufs; for (i = 0; i < rx_ring->cnt - 1; i++) { @@ -1560,6 +1594,9 @@ nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp, { unsigned int i; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return nfp_net_xsk_rx_ring_fill_freelist(rx_ring); + for (i = 0; i < rx_ring->cnt - 1; i++) nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, rx_ring->rxbufs[i].dma_addr); @@ -1587,10 +1624,10 @@ static int nfp_net_rx_csum_has_errors(u16 flags) * @meta: Parsed metadata prepend * @skb: Pointer to SKB */ -static void nfp_net_rx_csum(struct nfp_net_dp *dp, - struct nfp_net_r_vector *r_vec, - struct nfp_net_rx_desc *rxd, - struct nfp_meta_parsed *meta, struct sk_buff *skb) +void nfp_net_rx_csum(const struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, struct sk_buff *skb) { skb_checksum_none_assert(skb); @@ -1668,7 +1705,7 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, &rx_hash->hash); } -static bool +bool nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, void *data, void *pkt, unsigned int pkt_len, int meta_len) { @@ -2540,7 +2577,11 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) if (dp->netdev) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - kvfree(rx_ring->rxbufs); + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + kvfree(rx_ring->xsk_rxbufs); + else + kvfree(rx_ring->rxbufs); if (rx_ring->rxds) dma_free_coherent(dp->dev, rx_ring->size, @@ -2548,6 +2589,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) rx_ring->cnt = 0; rx_ring->rxbufs = NULL; + rx_ring->xsk_rxbufs = NULL; rx_ring->rxds = NULL; rx_ring->dma = 0; rx_ring->size = 0; @@ -2563,8 +2605,18 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) static int nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) { + enum xdp_mem_type mem_type; + size_t rxbuf_sw_desc_sz; int err; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + mem_type = MEM_TYPE_XSK_BUFF_POOL; + rxbuf_sw_desc_sz = sizeof(*rx_ring->xsk_rxbufs); + } else { + mem_type = MEM_TYPE_PAGE_ORDER0; + rxbuf_sw_desc_sz = sizeof(*rx_ring->rxbufs); + } + if (dp->netdev) { err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx, rx_ring->r_vec->napi.napi_id); @@ -2572,6 +2624,10 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) return err; } + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, mem_type, NULL); + if (err) + goto err_alloc; + rx_ring->cnt = dp->rxd_cnt; rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds)); rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size, @@ -2583,10 +2639,17 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) goto err_alloc; } - rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs), - GFP_KERNEL); - if (!rx_ring->rxbufs) - goto err_alloc; + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + rx_ring->xsk_rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->xsk_rxbufs) + goto err_alloc; + } else { + rx_ring->rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->rxbufs) + goto err_alloc; + } return 0; @@ -2639,6 +2702,27 @@ static void nfp_net_rx_rings_free(struct nfp_net_dp *dp) } static void +nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx) +{ + if (dp->netdev) + netif_napi_add(dp->netdev, &r_vec->napi, + nfp_net_has_xsk_pool_slow(dp, idx) ? + nfp_net_xsk_poll : nfp_net_poll, + NAPI_POLL_WEIGHT); + else + tasklet_enable(&r_vec->tasklet); +} + +static void +nfp_net_napi_del(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec) +{ + if (dp->netdev) + netif_napi_del(&r_vec->napi); + else + tasklet_disable(&r_vec->tasklet); +} + +static void nfp_net_vector_assign_rings(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx) { @@ -2648,6 +2732,17 @@ nfp_net_vector_assign_rings(struct nfp_net_dp *dp, r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ? &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL; + + if (nfp_net_has_xsk_pool_slow(dp, idx) || r_vec->xsk_pool) { + r_vec->xsk_pool = dp->xdp_prog ? dp->xsk_pools[idx] : NULL; + + if (r_vec->xsk_pool) + xsk_pool_set_rxq_info(r_vec->xsk_pool, + &r_vec->rx_ring->xdp_rxq); + + nfp_net_napi_del(dp, r_vec); + nfp_net_napi_add(dp, r_vec, idx); + } } static int @@ -2656,23 +2751,14 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, { int err; - /* Setup NAPI */ - if (nn->dp.netdev) - netif_napi_add(nn->dp.netdev, &r_vec->napi, - nfp_net_poll, NAPI_POLL_WEIGHT); - else - tasklet_enable(&r_vec->tasklet); + nfp_net_napi_add(&nn->dp, r_vec, idx); snprintf(r_vec->name, sizeof(r_vec->name), "%s-rxtx-%d", nfp_net_name(nn), idx); err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name, r_vec); if (err) { - if (nn->dp.netdev) - netif_napi_del(&r_vec->napi); - else - tasklet_disable(&r_vec->tasklet); - + nfp_net_napi_del(&nn->dp, r_vec); nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector); return err; } @@ -2690,11 +2776,7 @@ static void nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) { irq_set_affinity_hint(r_vec->irq_vector, NULL); - if (nn->dp.netdev) - netif_napi_del(&r_vec->napi); - else - tasklet_disable(&r_vec->tasklet); - + nfp_net_napi_del(&nn->dp, r_vec); free_irq(r_vec->irq_vector, r_vec); } @@ -2808,8 +2890,11 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) if (err) nn_err(nn, "Could not disable device: %d\n", err); - for (r = 0; r < nn->dp.num_rx_rings; r++) + for (r = 0; r < nn->dp.num_rx_rings; r++) { nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]); + if (nfp_net_has_xsk_pool_slow(&nn->dp, nn->dp.rx_rings[r].idx)) + nfp_net_xsk_rx_bufs_free(&nn->dp.rx_rings[r]); + } for (r = 0; r < nn->dp.num_tx_rings; r++) nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]); for (r = 0; r < nn->dp.num_r_vecs; r++) @@ -3296,6 +3381,15 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) *new = nn->dp; + new->xsk_pools = kmemdup(new->xsk_pools, + array_size(nn->max_r_vecs, + sizeof(new->xsk_pools)), + GFP_KERNEL); + if (!new->xsk_pools) { + kfree(new); + return NULL; + } + /* Clear things which need to be recomputed */ new->fl_bufsz = 0; new->tx_rings = NULL; @@ -3306,10 +3400,18 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) return new; } +static void nfp_net_free_dp(struct nfp_net_dp *dp) +{ + kfree(dp->xsk_pools); + kfree(dp); +} + static int nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, struct netlink_ext_ack *extack) { + unsigned int r, xsk_min_fl_bufsz; + /* XDP-enabled tests */ if (!dp->xdp_prog) return 0; @@ -3322,6 +3424,18 @@ nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, return -EINVAL; } + xsk_min_fl_bufsz = nfp_net_calc_fl_bufsz_xsk(dp); + for (r = 0; r < nn->max_r_vecs; r++) { + if (!dp->xsk_pools[r]) + continue; + + if (xsk_pool_get_rx_frame_size(dp->xsk_pools[r]) < xsk_min_fl_bufsz) { + NL_SET_ERR_MSG_MOD(extack, + "XSK buffer pool chunk size too small"); + return -EINVAL; + } + } + return 0; } @@ -3389,7 +3503,7 @@ int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp, nfp_net_open_stack(nn); exit_free_dp: - kfree(dp); + nfp_net_free_dp(dp); return err; @@ -3398,7 +3512,7 @@ err_free_rx: err_cleanup_vecs: for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - kfree(dp); + nfp_net_free_dp(dp); return err; } @@ -3716,6 +3830,9 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) return nfp_net_xdp_setup_drv(nn, xdp); case XDP_SETUP_PROG_HW: return nfp_net_xdp_setup_hw(nn, xdp); + case XDP_SETUP_XSK_POOL: + return nfp_net_xsk_setup_pool(netdev, xdp->xsk.pool, + xdp->xsk.queue_id); default: return nfp_app_bpf(nn->app, nn, xdp); } @@ -3766,6 +3883,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_features_check = nfp_net_features_check, .ndo_get_phys_port_name = nfp_net_get_phys_port_name, .ndo_bpf = nfp_net_xdp, + .ndo_xsk_wakeup = nfp_net_xsk_wakeup, .ndo_get_devlink_port = nfp_devlink_get_devlink_port, }; @@ -3893,6 +4011,14 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings); nn->dp.num_r_vecs = min_t(unsigned int, nn->dp.num_r_vecs, num_online_cpus()); + nn->max_r_vecs = nn->dp.num_r_vecs; + + nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(nn->dp.xsk_pools), + GFP_KERNEL); + if (!nn->dp.xsk_pools) { + err = -ENOMEM; + goto err_free_nn; + } nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT; nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; @@ -3932,6 +4058,7 @@ void nfp_net_free(struct nfp_net *nn) WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted); nfp_ccm_mbox_free(nn); + kfree(nn->dp.xsk_pools); if (nn->dp.netdev) free_netdev(nn->dp.netdev); else diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 3d61a8cb60b0..50007cc5b580 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (C) 2015-2018 Netronome Systems, Inc. */ -/* - * nfp_net_ctrl.h +/* nfp_net_ctrl.h * Netronome network device driver: Control BAR layout * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> * Jason McMullan <jason.mcmullan@netronome.com> @@ -15,30 +14,24 @@ #include <linux/types.h> -/** - * Configuration BAR size. +/* Configuration BAR size. * * The configuration BAR is 8K in size, but due to * THB-350, 32k needs to be reserved. */ #define NFP_NET_CFG_BAR_SZ (32 * 1024) -/** - * Offset in Freelist buffer where packet starts on RX - */ +/* Offset in Freelist buffer where packet starts on RX */ #define NFP_NET_RX_OFFSET 32 -/** - * LSO parameters +/* LSO parameters * %NFP_NET_LSO_MAX_HDR_SZ: Maximum header size supported for LSO frames * %NFP_NET_LSO_MAX_SEGS: Maximum number of segments LSO frame can produce */ #define NFP_NET_LSO_MAX_HDR_SZ 255 #define NFP_NET_LSO_MAX_SEGS 64 -/** - * Prepend field types - */ +/* Prepend field types */ #define NFP_NET_META_FIELD_SIZE 4 #define NFP_NET_META_HASH 1 /* next field carries hash type */ #define NFP_NET_META_MARK 2 @@ -49,9 +42,7 @@ #define NFP_META_PORT_ID_CTRL ~0U -/** - * Hash type pre-pended when a RSS hash was computed - */ +/* Hash type pre-pended when a RSS hash was computed */ #define NFP_NET_RSS_NONE 0 #define NFP_NET_RSS_IPV4 1 #define NFP_NET_RSS_IPV6 2 @@ -63,16 +54,14 @@ #define NFP_NET_RSS_IPV6_UDP 8 #define NFP_NET_RSS_IPV6_EX_UDP 9 -/** - * Ring counts +/* Ring counts * %NFP_NET_TXR_MAX: Maximum number of TX rings * %NFP_NET_RXR_MAX: Maximum number of RX rings */ #define NFP_NET_TXR_MAX 64 #define NFP_NET_RXR_MAX 64 -/** - * Read/Write config words (0x0000 - 0x002c) +/* Read/Write config words (0x0000 - 0x002c) * %NFP_NET_CFG_CTRL: Global control * %NFP_NET_CFG_UPDATE: Indicate which fields are updated * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings @@ -147,8 +136,7 @@ #define NFP_NET_CFG_LSC 0x0020 #define NFP_NET_CFG_MACADDR 0x0024 -/** - * Read-only words (0x0030 - 0x0050): +/* Read-only words (0x0030 - 0x0050): * %NFP_NET_CFG_VERSION: Firmware version number * %NFP_NET_CFG_STS: Status * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) @@ -193,36 +181,31 @@ #define NFP_NET_CFG_START_TXQ 0x0048 #define NFP_NET_CFG_START_RXQ 0x004c -/** - * Prepend configuration +/* Prepend configuration */ #define NFP_NET_CFG_RX_OFFSET 0x0050 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ -/** - * RSS capabilities +/* RSS capabilities * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as * %NFP_NET_CFG_RSS_HFUNC) */ #define NFP_NET_CFG_RSS_CAP 0x0054 #define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 -/** - * TLV area start +/* TLV area start * %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area */ #define NFP_NET_CFG_TLV_BASE 0x0058 -/** - * VXLAN/UDP encap configuration +/* VXLAN/UDP encap configuration * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ #define NFP_NET_CFG_VXLAN_PORT 0x0060 #define NFP_NET_CFG_VXLAN_SZ 0x0008 -/** - * BPF section +/* BPF section * %NFP_NET_CFG_BPF_ABI: BPF ABI version * %NFP_NET_CFG_BPF_CAP: BPF capabilities * %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes @@ -247,14 +230,12 @@ #define NFP_NET_CFG_BPF_CFG_MASK 7ULL #define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) -/** - * 40B reserved for future use (0x0098 - 0x00c0) +/* 40B reserved for future use (0x0098 - 0x00c0) */ #define NFP_NET_CFG_RESERVED 0x0098 #define NFP_NET_CFG_RESERVED_SZ 0x0028 -/** - * RSS configuration (0x0100 - 0x01ac): +/* RSS configuration (0x0100 - 0x01ac): * Used only when NFP_NET_CFG_CTRL_RSS is enabled * %NFP_NET_CFG_RSS_CFG: RSS configuration word * %NFP_NET_CFG_RSS_KEY: RSS "secret" key @@ -281,8 +262,7 @@ NFP_NET_CFG_RSS_KEY_SZ) #define NFP_NET_CFG_RSS_ITBL_SZ 0x80 -/** - * TX ring configuration (0x200 - 0x800) +/* TX ring configuration (0x200 - 0x800) * %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration * %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) @@ -301,8 +281,7 @@ #define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ ((_x) * 0x4)) -/** - * RX ring configuration (0x0800 - 0x0c00) +/* RX ring configuration (0x0800 - 0x0c00) * %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration * %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) * %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) @@ -318,8 +297,7 @@ #define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ ((_x) * 0x4)) -/** - * Interrupt Control/Cause registers (0x0c00 - 0x0d00) +/* Interrupt Control/Cause registers (0x0c00 - 0x0d00) * These registers are only used when MSI-X auto-masking is not * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index * by MSI-X entry and are 1B in size. If an entry is zero, the @@ -334,8 +312,7 @@ #define NFP_NET_CFG_ICR_RXTX 0x1 #define NFP_NET_CFG_ICR_LSC 0x2 -/** - * General device stats (0x0d00 - 0x0d90) +/* General device stats (0x0d00 - 0x0d90) * all counters are 64bit. */ #define NFP_NET_CFG_STATS_BASE 0x0d00 @@ -368,8 +345,7 @@ #define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) #define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) -/** - * Per ring stats (0x1000 - 0x1800) +/* Per ring stats (0x1000 - 0x1800) * options, 64bit per entry * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) @@ -381,8 +357,7 @@ #define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ ((_x) * 0x10)) -/** - * General use mailbox area (0x1800 - 0x19ff) +/* General use mailbox area (0x1800 - 0x19ff) * 4B used for update command and 4B return code * followed by a max of 504B of variable length value */ @@ -399,8 +374,7 @@ #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5 #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6 -/** - * VLAN filtering using general use mailbox +/* VLAN filtering using general use mailbox * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter @@ -411,8 +385,7 @@ #define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2) #define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004 -/** - * TLV capabilities +/* TLV capabilities * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV @@ -438,8 +411,7 @@ #define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 #define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff -/** - * Capability TLV types +/* Capability TLV types * * %NFP_NET_CFG_TLV_TYPE_UNKNOWN: * Special TLV type to catch bugs, should never be encountered. Drivers should @@ -512,8 +484,7 @@ struct device; -/** - * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities +/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities * @me_freq_mhz: ME clock_freq (MHz) * @mbox_off: vNIC mailbox area offset * @mbox_len: vNIC mailbox area length diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index 553c708694e8..2c74b3c5aef9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -42,13 +42,19 @@ static int nfp_rx_q_show(struct seq_file *file, void *data) seq_printf(file, "%04d: 0x%08x 0x%08x", i, rxd->vals[0], rxd->vals[1]); - frag = READ_ONCE(rx_ring->rxbufs[i].frag); - if (frag) - seq_printf(file, " frag=%p", frag); - - if (rx_ring->rxbufs[i].dma_addr) - seq_printf(file, " dma_addr=%pad", - &rx_ring->rxbufs[i].dma_addr); + if (!r_vec->xsk_pool) { + frag = READ_ONCE(rx_ring->rxbufs[i].frag); + if (frag) + seq_printf(file, " frag=%p", frag); + + if (rx_ring->rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->rxbufs[i].dma_addr); + } else { + if (rx_ring->xsk_rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->xsk_rxbufs[i].dma_addr); + } if (i == rx_ring->rd_p % rxd_cnt) seq_puts(file, " H_RD "); @@ -103,20 +109,23 @@ static int nfp_tx_q_show(struct seq_file *file, void *data) tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); for (i = 0; i < txd_cnt; i++) { + struct xdp_buff *xdp; + struct sk_buff *skb; + txd = &tx_ring->txds[i]; seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, txd->vals[0], txd->vals[1], txd->vals[2], txd->vals[3]); - if (tx_ring == r_vec->tx_ring) { - struct sk_buff *skb = READ_ONCE(tx_ring->txbufs[i].skb); - + if (!tx_ring->is_xdp) { + skb = READ_ONCE(tx_ring->txbufs[i].skb); if (skb) seq_printf(file, " skb->head=%p skb->data=%p", skb->head, skb->data); } else { - seq_printf(file, " frag=%p", - READ_ONCE(tx_ring->txbufs[i].frag)); + xdp = READ_ONCE(tx_ring->txbufs[i].xdp); + if (xdp) + seq_printf(file, " xdp->data=%p", xdp->data); } if (tx_ring->txbufs[i].dma_addr) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h index a3db0cbf6425..786be58a907e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h @@ -4,8 +4,7 @@ #ifndef _NFP_NET_SRIOV_H_ #define _NFP_NET_SRIOV_H_ -/** - * SRIOV VF configuration. +/* SRIOV VF configuration. * The configuration memory begins with a mailbox region for communication with * the firmware followed by individual VF entries. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c new file mode 100644 index 000000000000..ab7243277efa --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c @@ -0,0 +1,592 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <net/xdp_sock_drv.h> +#include <trace/events/xdp.h> + +#include "nfp_app.h" +#include "nfp_net.h" +#include "nfp_net_xsk.h" + +static int nfp_net_tx_space(struct nfp_net_tx_ring *tx_ring) +{ + return tx_ring->cnt - tx_ring->wr_p + tx_ring->rd_p - 1; +} + +static void nfp_net_xsk_tx_free(struct nfp_net_tx_buf *txbuf) +{ + xsk_buff_free(txbuf->xdp); + + txbuf->dma_addr = 0; + txbuf->xdp = NULL; +} + +void nfp_net_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_tx_buf *txbuf; + unsigned int idx; + + while (tx_ring->rd_p != tx_ring->wr_p) { + idx = D_IDX(tx_ring, tx_ring->rd_p); + txbuf = &tx_ring->txbufs[idx]; + + txbuf->real_len = 0; + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + + if (tx_ring->r_vec->xsk_pool) { + if (txbuf->is_xsk_tx) + nfp_net_xsk_tx_free(txbuf); + + xsk_tx_completed(tx_ring->r_vec->xsk_pool, 1); + } + } +} + +static bool nfp_net_xsk_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + u32 done_pkts = 0, done_bytes = 0, reused = 0; + bool done_all; + int idx, todo; + u32 qcp_rd_p; + + if (tx_ring->wr_p == tx_ring->rd_p) + return true; + + /* Work out how many descriptors have been transmitted. */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); + + done_pkts = todo; + while (todo--) { + struct nfp_net_tx_buf *txbuf; + + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_ring->rd_p++; + + txbuf = &tx_ring->txbufs[idx]; + if (unlikely(!txbuf->real_len)) + continue; + + done_bytes += txbuf->real_len; + txbuf->real_len = 0; + + if (txbuf->is_xsk_tx) { + nfp_net_xsk_tx_free(txbuf); + reused++; + } + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + xsk_tx_completed(r_vec->xsk_pool, done_pkts - reused); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +static void nfp_net_xsk_tx(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct xdp_desc desc[NFP_NET_XSK_TX_BATCH]; + struct xsk_buff_pool *xsk_pool; + struct nfp_net_tx_desc *txd; + u32 pkts = 0, wr_idx; + u32 i, got; + + xsk_pool = r_vec->xsk_pool; + + while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) { + for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++) + if (!xsk_tx_peek_desc(xsk_pool, &desc[i])) + break; + got = i; + if (!got) + break; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + prefetchw(&tx_ring->txds[wr_idx]); + + for (i = 0; i < got; i++) + xsk_buff_raw_dma_sync_for_device(xsk_pool, desc[i].addr, + desc[i].len); + + for (i = 0; i < got; i++) { + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + + tx_ring->txbufs[wr_idx].real_len = desc[i].len; + tx_ring->txbufs[wr_idx].is_xsk_tx = false; + + /* Build TX descriptor. */ + txd = &tx_ring->txds[wr_idx]; + nfp_desc_set_dma_addr(txd, + xsk_buff_raw_get_dma(xsk_pool, + desc[i].addr + )); + txd->offset_eop = PCIE_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(desc[i].len); + txd->data_len = cpu_to_le16(desc[i].len); + } + + tx_ring->wr_p += got; + pkts += got; + } + + if (!pkts) + return; + + xsk_tx_release(xsk_pool); + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, pkts); +} + +static bool +nfp_net_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len, + int pkt_off) +{ + struct xsk_buff_pool *pool = r_vec->xsk_pool; + struct nfp_net_tx_buf *txbuf; + struct nfp_net_tx_desc *txd; + unsigned int wr_idx; + + if (nfp_net_tx_space(tx_ring) < 1) + return false; + + xsk_buff_raw_dma_sync_for_device(pool, xrxbuf->dma_addr + pkt_off, pkt_len); + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->xdp = xrxbuf->xdp; + txbuf->real_len = pkt_len; + txbuf->is_xsk_tx = true; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = PCIE_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_ptr_add++; + tx_ring->wr_p++; + + return true; +} + +static int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) +{ + return rx_ring->cnt - rx_ring->wr_p + rx_ring->rd_p - 1; +} + +static void +nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx, + struct xdp_buff *xdp) +{ + unsigned int headroom; + + headroom = xsk_pool_get_headroom(rx_ring->r_vec->xsk_pool); + + rx_ring->rxds[idx].fld.reserved = 0; + rx_ring->rxds[idx].fld.meta_len_dd = 0; + + rx_ring->xsk_rxbufs[idx].xdp = xdp; + rx_ring->xsk_rxbufs[idx].dma_addr = + xsk_buff_xdp_get_frame_dma(xdp) + headroom; +} + +static void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf) +{ + rxbuf->dma_addr = 0; + rxbuf->xdp = NULL; +} + +static void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf) +{ + if (rxbuf->xdp) + xsk_buff_free(rxbuf->xdp); + + nfp_net_xsk_rx_unstash(rxbuf); +} + +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (!rx_ring->cnt) + return; + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_net_xsk_rx_free(&rx_ring->xsk_rxbufs[i]); +} + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct xsk_buff_pool *pool = r_vec->xsk_pool; + unsigned int wr_idx, wr_ptr_add = 0; + struct xdp_buff *xdp; + + while (nfp_net_rx_space(rx_ring)) { + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + xdp = xsk_buff_alloc(pool); + if (!xdp) + break; + + nfp_net_xsk_rx_bufs_stash(rx_ring, wr_idx, xdp); + + nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, + rx_ring->xsk_rxbufs[wr_idx].dma_addr); + + rx_ring->wr_p++; + wr_ptr_add++; + } + + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, wr_ptr_add); +} + +static void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, + struct nfp_net_xsk_rx_buf *xrxbuf) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_xsk_rx_free(xrxbuf); +} + +static void nfp_net_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring, + const struct nfp_net_rx_desc *rxd, + struct nfp_net_xsk_rx_buf *xrxbuf, + const struct nfp_meta_parsed *meta, + unsigned int pkt_len, + bool meta_xdp, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct net_device *netdev; + struct sk_buff *skb; + + if (likely(!meta->portid)) { + netdev = dp->netdev; + } else { + struct nfp_net *nn = netdev_priv(dp->netdev); + + netdev = nfp_app_dev_get(nn->app, meta->portid, NULL); + if (unlikely(!netdev)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = napi_alloc_skb(&r_vec->napi, pkt_len); + if (!skb) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + memcpy(skb_put(skb, pkt_len), xrxbuf->xdp->data, pkt_len); + + skb->mark = meta->mark; + skb_set_hash(skb, meta->hash, meta->hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_net_rx_csum(dp, r_vec, rxd, meta, skb); + + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rxd->rxd.vlan)); + if (meta_xdp) + skb_metadata_set(skb, + xrxbuf->xdp->data - xrxbuf->xdp->data_meta); + + napi_gro_receive(&rx_ring->r_vec->napi, skb); + + nfp_net_xsk_rx_free(xrxbuf); + + (*skbs_polled)++; +} + +static unsigned int +nfp_net_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_redir = false; + int pkts_polled = 0; + + xdp_prog = READ_ONCE(dp->xdp_prog); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, pkt_len, pkt_off; + struct nfp_net_xsk_rx_buf *xrxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + int idx, act; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + rx_ring->rd_p++; + pkts_polled++; + + xrxbuf = &rx_ring->xsk_rxbufs[idx]; + + /* If starved of buffers "drop" it and scream. */ + if (rx_ring->rd_p >= rx_ring->wr_p) { + nn_dp_warn(dp, "Starved of RX buffers\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + /* Only supporting AF_XDP with dynamic metadata so buffer layout + * is always: + * + * --------------------------------------------------------- + * | off | metadata | packet | XXXX | + * --------------------------------------------------------- + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) { + nn_dp_warn(dp, "Oversized RX packet metadata %u\n", + meta_len); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + /* Stats update. */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + xrxbuf->xdp->data += meta_len; + xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len; + xdp_set_data_meta_invalid(xrxbuf->xdp); + xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool); + net_prefetch(xrxbuf->xdp->data); + + if (meta_len) { + if (unlikely(nfp_net_parse_meta(dp->netdev, &meta, + xrxbuf->xdp->data - + meta_len, + xrxbuf->xdp->data, + pkt_len, meta_len))) { + nn_dp_warn(dp, "Invalid RX packet metadata\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + if (unlikely(meta.portid)) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + if (meta.portid != NFP_META_PORT_ID_CTRL) { + nfp_net_xsk_rx_skb(rx_ring, rxd, xrxbuf, + &meta, pkt_len, + false, skbs_polled); + continue; + } + + nfp_app_ctrl_rx_raw(nn->app, xrxbuf->xdp->data, + pkt_len); + nfp_net_xsk_rx_free(xrxbuf); + continue; + } + } + + act = bpf_prog_run_xdp(xdp_prog, xrxbuf->xdp); + + pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data; + pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start; + + switch (act) { + case XDP_PASS: + nfp_net_xsk_rx_skb(rx_ring, rxd, xrxbuf, &meta, pkt_len, + true, skbs_polled); + break; + case XDP_TX: + if (!nfp_net_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring, + xrxbuf, pkt_len, pkt_off)) + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + else + nfp_net_xsk_rx_unstash(xrxbuf); + break; + case XDP_REDIRECT: + if (xdp_do_redirect(dp->netdev, xrxbuf->xdp, xdp_prog)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + } else { + nfp_net_xsk_rx_unstash(xrxbuf); + xdp_redir = true; + } + break; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + } + + nfp_net_xsk_rx_ring_fill_freelist(r_vec->rx_ring); + + if (xdp_redir) + xdp_do_flush_map(); + + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + + return pkts_polled; +} + +static void nfp_net_xsk_pool_unmap(struct device *dev, + struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_unmap(pool, 0); +} + +static int nfp_net_xsk_pool_map(struct device *dev, struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_map(pool, dev, 0); +} + +int nfp_net_xsk_setup_pool(struct net_device *netdev, + struct xsk_buff_pool *pool, u16 queue_id) +{ + struct nfp_net *nn = netdev_priv(netdev); + + struct xsk_buff_pool *prev_pool; + struct nfp_net_dp *dp; + int err; + + /* Reject on old FWs so we can drop some checks on datapath. */ + if (nn->dp.rx_offset != NFP_NET_CFG_RX_OFFSET_DYNAMIC) + return -EOPNOTSUPP; + if (!nn->dp.chained_metadata_format) + return -EOPNOTSUPP; + + /* Install */ + if (pool) { + err = nfp_net_xsk_pool_map(nn->dp.dev, pool); + if (err) + return err; + } + + /* Reconfig/swap */ + dp = nfp_net_clone_dp(nn); + if (!dp) { + err = -ENOMEM; + goto err_unmap; + } + + prev_pool = dp->xsk_pools[queue_id]; + dp->xsk_pools[queue_id] = pool; + + err = nfp_net_ring_reconfig(nn, dp, NULL); + if (err) + goto err_unmap; + + /* Uninstall */ + if (prev_pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, prev_pool); + + return 0; +err_unmap: + if (pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, pool); + + return err; +} + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags) +{ + struct nfp_net *nn = netdev_priv(netdev); + + /* queue_id comes from a zero-copy socket, installed with XDP_SETUP_XSK_POOL, + * so it must be within our vector range. Moreover, our napi structs + * are statically allocated, so we can always kick them without worrying + * if reconfig is in progress or interface down. + */ + napi_schedule(&nn->r_vecs[queue_id].napi); + + return 0; +} + +int nfp_net_xsk_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled, skbs = 0; + + pkts_polled = nfp_net_xsk_rx(r_vec->rx_ring, budget, &skbs); + + if (pkts_polled < budget) { + if (r_vec->tx_ring) + nfp_net_tx_complete(r_vec->tx_ring, budget); + + if (!nfp_net_xsk_complete(r_vec->xdp_ring)) + pkts_polled = budget; + + nfp_net_xsk_tx(r_vec->xdp_ring); + + if (pkts_polled < budget && napi_complete_done(napi, skbs)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } + + return pkts_polled; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h new file mode 100644 index 000000000000..5c8549cb3543 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#ifndef _NFP_XSK_H_ +#define _NFP_XSK_H_ + +#include <net/xdp_sock_drv.h> + +#define NFP_NET_XSK_TX_BATCH 16 /* XSK TX transmission batch size. */ + +static inline bool nfp_net_has_xsk_pool_slow(struct nfp_net_dp *dp, + unsigned int qid) +{ + return dp->xdp_prog && dp->xsk_pools[qid]; +} + +int nfp_net_xsk_setup_pool(struct net_device *netdev, struct xsk_buff_pool *pool, + u16 queue_id); + +void nfp_net_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring); +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring); + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring); + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); +int nfp_net_xsk_poll(struct napi_struct *napi, int budget); + +#endif /* _NFP_XSK_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index ae4da189d955..df316b9e891d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -132,8 +132,7 @@ void nfp_devlink_port_unregister(struct nfp_port *port); void nfp_devlink_port_type_eth_set(struct nfp_port *port); void nfp_devlink_port_type_clear(struct nfp_port *port); -/** - * Mac stats (0x0000 - 0x0200) +/* Mac stats (0x0000 - 0x0200) * all counters are 64bit. */ #define NFP_MAC_STATS_BASE 0x0000 diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 10e7d8b21c46..730fea214b8a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -513,7 +513,7 @@ nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp, dma_size = BIT_ULL(dma_order); nseg = DIV_ROUND_UP(max_size, chunk_size); - chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL); + chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL); if (!chunks) return -ENOMEM; diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 07a00dd9cfe0..4b3482ce90a1 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -324,8 +324,9 @@ static int nixge_hw_dma_bd_init(struct net_device *ndev) + sizeof(*priv->rx_bd_v) * ((i + 1) % RX_BD_NUM)); - skb = netdev_alloc_skb_ip_align(ndev, - NIXGE_MAX_JUMBO_FRAME_SIZE); + skb = __netdev_alloc_skb_ip_align(ndev, + NIXGE_MAX_JUMBO_FRAME_SIZE, + GFP_KERNEL); if (!skb) goto out; diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 5e25411ff02f..602f4d45d529 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -18,7 +18,7 @@ struct ionic_lif; #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003 -#define DEVCMD_TIMEOUT 10 +#define DEVCMD_TIMEOUT 5 #define IONIC_ADMINQ_TIME_SLICE msecs_to_jiffies(100) #define IONIC_PHC_UPDATE_NS 10000000000 /* 10s in nanoseconds */ @@ -78,6 +78,9 @@ void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, u8 status, int err); int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait); +int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_wait); +void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status, + int err); int ionic_set_dma_mask(struct ionic *ionic); int ionic_setup(struct ionic *ionic); @@ -89,4 +92,6 @@ int ionic_port_identify(struct ionic *ionic); int ionic_port_init(struct ionic *ionic); int ionic_port_reset(struct ionic *ionic); +const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr); + #endif /* _IONIC_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 7e296fa71b36..6ffc62c41165 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -109,8 +109,8 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page) static void ionic_vf_dealloc_locked(struct ionic *ionic) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR }; struct ionic_vf *v; - dma_addr_t dma = 0; int i; if (!ionic->vfs) @@ -120,9 +120,8 @@ static void ionic_vf_dealloc_locked(struct ionic *ionic) v = &ionic->vfs[i]; if (v->stats_pa) { - (void)ionic_set_vf_config(ionic, i, - IONIC_VF_ATTR_STATSADDR, - (u8 *)&dma); + vfc.stats_pa = 0; + (void)ionic_set_vf_config(ionic, i, &vfc); dma_unmap_single(ionic->dev, v->stats_pa, sizeof(v->stats), DMA_FROM_DEVICE); v->stats_pa = 0; @@ -143,6 +142,7 @@ static void ionic_vf_dealloc(struct ionic *ionic) static int ionic_vf_alloc(struct ionic *ionic, int num_vfs) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_STATSADDR }; struct ionic_vf *v; int err = 0; int i; @@ -166,9 +166,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs) } ionic->num_vfs++; + /* ignore failures from older FW, we just won't get stats */ - (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR, - (u8 *)&v->stats_pa); + vfc.stats_pa = cpu_to_le64(v->stats_pa); + (void)ionic_set_vf_config(ionic, i, &vfc); } out: @@ -331,6 +332,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_deregister_lifs; } + mod_timer(&ionic->watchdog_timer, + round_jiffies(jiffies + ionic->watchdog_period)); + return 0; err_out_deregister_lifs: @@ -348,7 +352,6 @@ err_out_port_reset: err_out_reset: ionic_reset(ionic); err_out_teardown: - del_timer_sync(&ionic->watchdog_timer); pci_clear_master(pdev); /* Don't fail the probe for these errors, keep * the hw interface around for inspection diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index d57e80d44c9d..9d0514cfeb5c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -33,7 +33,8 @@ static void ionic_watchdog_cb(struct timer_list *t) !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) ionic_link_status_check_request(lif, CAN_NOT_SLEEP); - if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) { + if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state) && + !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { netdev_err(lif->netdev, "rxmode change dropped\n"); @@ -46,6 +47,24 @@ static void ionic_watchdog_cb(struct timer_list *t) } } +static void ionic_watchdog_init(struct ionic *ionic) +{ + struct ionic_dev *idev = &ionic->idev; + + timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0); + ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ; + + /* set times to ensure the first check will proceed */ + atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ); + idev->last_hb_time = jiffies - 2 * ionic->watchdog_period; + /* init as ready, so no transition if the first check succeeds */ + idev->last_fw_hb = 0; + idev->fw_hb_ready = true; + idev->fw_status_ready = true; + idev->fw_generation = IONIC_FW_STS_F_GENERATION & + ioread8(&idev->dev_info_regs->fw_status); +} + void ionic_init_devinfo(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; @@ -109,21 +128,7 @@ int ionic_dev_setup(struct ionic *ionic) return -EFAULT; } - timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0); - ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ; - - /* set times to ensure the first check will proceed */ - atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ); - idev->last_hb_time = jiffies - 2 * ionic->watchdog_period; - /* init as ready, so no transition if the first check succeeds */ - idev->last_fw_hb = 0; - idev->fw_hb_ready = true; - idev->fw_status_ready = true; - idev->fw_generation = IONIC_FW_STS_F_GENERATION & - ioread8(&idev->dev_info_regs->fw_status); - - mod_timer(&ionic->watchdog_timer, - round_jiffies(jiffies + ionic->watchdog_period)); + ionic_watchdog_init(ionic); idev->db_pages = bar->vaddr; idev->phy_db_pages = bar->bus_addr; @@ -132,10 +137,21 @@ int ionic_dev_setup(struct ionic *ionic) } /* Devcmd Interface */ +bool ionic_is_fw_running(struct ionic_dev *idev) +{ + u8 fw_status = ioread8(&idev->dev_info_regs->fw_status); + + /* firmware is useful only if the running bit is set and + * fw_status != 0xff (bad PCI read) + */ + return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING); +} + int ionic_heartbeat_check(struct ionic *ionic) { - struct ionic_dev *idev = &ionic->idev; unsigned long check_time, last_check_time; + struct ionic_dev *idev = &ionic->idev; + struct ionic_lif *lif = ionic->lif; bool fw_status_ready = true; bool fw_hb_ready; u8 fw_generation; @@ -155,13 +171,10 @@ do_check_time: goto do_check_time; } - /* firmware is useful only if the running bit is set and - * fw_status != 0xff (bad PCI read) - * If fw_status is not ready don't bother with the generation. - */ fw_status = ioread8(&idev->dev_info_regs->fw_status); - if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) { + /* If fw_status is not ready don't bother with the generation */ + if (!ionic_is_fw_running(idev)) { fw_status_ready = false; } else { fw_generation = fw_status & IONIC_FW_STS_F_GENERATION; @@ -176,26 +189,40 @@ do_check_time: * the down, the next watchdog will see the fw is up * and the generation value stable, so will trigger * the fw-up activity. + * + * If we had already moved to FW_RESET from a RESET event, + * it is possible that we never saw the fw_status go to 0, + * so we fake the current idev->fw_status_ready here to + * force the transition and get FW up again. */ - fw_status_ready = false; + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + idev->fw_status_ready = false; /* go to running */ + else + fw_status_ready = false; /* go to down */ } } + dev_dbg(ionic->dev, "fw_status 0x%02x ready %d idev->ready %d last_hb 0x%x state 0x%02lx\n", + fw_status, fw_status_ready, idev->fw_status_ready, + idev->last_fw_hb, lif->state[0]); + /* is this a transition? */ - if (fw_status_ready != idev->fw_status_ready) { - struct ionic_lif *lif = ionic->lif; + if (fw_status_ready != idev->fw_status_ready && + !test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { bool trigger = false; idev->fw_status_ready = fw_status_ready; - if (!fw_status_ready) { - dev_info(ionic->dev, "FW stopped %u\n", fw_status); - if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) - trigger = true; - } else { - dev_info(ionic->dev, "FW running %u\n", fw_status); - if (lif && test_bit(IONIC_LIF_F_FW_RESET, lif->state)) - trigger = true; + if (!fw_status_ready && + !test_bit(IONIC_LIF_F_FW_RESET, lif->state) && + !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { + dev_info(ionic->dev, "FW stopped 0x%02x\n", fw_status); + trigger = true; + + } else if (fw_status_ready && + test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { + dev_info(ionic->dev, "FW running 0x%02x\n", fw_status); + trigger = true; } if (trigger) { @@ -210,12 +237,14 @@ do_check_time: } } - if (!fw_status_ready) + if (!idev->fw_status_ready) return -ENXIO; - /* wait at least one watchdog period since the last heartbeat */ + /* Because of some variability in the actual FW heartbeat, we + * wait longer than the DEVCMD_TIMEOUT before checking again. + */ last_check_time = idev->last_hb_time; - if (time_before(check_time, last_check_time + ionic->watchdog_period)) + if (time_before(check_time, last_check_time + DEVCMD_TIMEOUT * 2 * HZ)) return 0; fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat); @@ -392,60 +421,63 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type) } /* VF commands */ -int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data) +int ionic_set_vf_config(struct ionic *ionic, int vf, + struct ionic_vf_setattr_cmd *vfc) { union ionic_dev_cmd cmd = { .vf_setattr.opcode = IONIC_CMD_VF_SETATTR, - .vf_setattr.attr = attr, + .vf_setattr.attr = vfc->attr, .vf_setattr.vf_index = cpu_to_le16(vf), }; int err; + memcpy(cmd.vf_setattr.pad, vfc->pad, sizeof(vfc->pad)); + + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_go(&ionic->idev, &cmd); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + + return err; +} + +int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr, + struct ionic_vf_getattr_comp *comp) +{ + union ionic_dev_cmd cmd = { + .vf_getattr.opcode = IONIC_CMD_VF_GETATTR, + .vf_getattr.attr = attr, + .vf_getattr.vf_index = cpu_to_le16(vf), + }; + int err; + + if (vf >= ionic->num_vfs) + return -EINVAL; + switch (attr) { case IONIC_VF_ATTR_SPOOFCHK: - cmd.vf_setattr.spoofchk = *data; - dev_dbg(ionic->dev, "%s: vf %d spoof %d\n", - __func__, vf, *data); - break; case IONIC_VF_ATTR_TRUST: - cmd.vf_setattr.trust = *data; - dev_dbg(ionic->dev, "%s: vf %d trust %d\n", - __func__, vf, *data); - break; case IONIC_VF_ATTR_LINKSTATE: - cmd.vf_setattr.linkstate = *data; - dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n", - __func__, vf, *data); - break; case IONIC_VF_ATTR_MAC: - ether_addr_copy(cmd.vf_setattr.macaddr, data); - dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n", - __func__, vf, data); - break; case IONIC_VF_ATTR_VLAN: - cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data); - dev_dbg(ionic->dev, "%s: vf %d vlan %d\n", - __func__, vf, *(u16 *)data); - break; case IONIC_VF_ATTR_RATE: - cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data); - dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n", - __func__, vf, *(u32 *)data); break; case IONIC_VF_ATTR_STATSADDR: - cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data); - dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n", - __func__, vf, *(u64 *)data); - break; default: return -EINVAL; } mutex_lock(&ionic->dev_cmd_lock); ionic_dev_cmd_go(&ionic->idev, &cmd); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + err = ionic_dev_cmd_wait_nomsg(ionic, DEVCMD_TIMEOUT); + memcpy_fromio(comp, &ionic->idev.dev_cmd_regs->comp.vf_getattr, + sizeof(*comp)); mutex_unlock(&ionic->dev_cmd_lock); + if (err && comp->status != IONIC_RC_ENOSUPP) + ionic_dev_cmd_dev_err_print(ionic, cmd.vf_getattr.opcode, + comp->status, err); + return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index e5acf3bd62b2..563c302eb033 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -318,7 +318,10 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable); void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type); void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type); -int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data); +int ionic_set_vf_config(struct ionic *ionic, int vf, + struct ionic_vf_setattr_cmd *vfc); +int ionic_dev_cmd_vf_getattr(struct ionic *ionic, int vf, u8 attr, + struct ionic_vf_getattr_comp *comp); void ionic_dev_cmd_queue_identify(struct ionic_dev *idev, u16 lif_type, u8 qtype, u8 qver); void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver); @@ -353,5 +356,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start); void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, unsigned int stop_index); int ionic_heartbeat_check(struct ionic *ionic); +bool ionic_is_fw_running(struct ionic_dev *idev); #endif /* _IONIC_DEV_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 386a5cf1e224..01c22701482d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -74,10 +74,10 @@ static void ionic_get_drvinfo(struct net_device *netdev, struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - strlcpy(drvinfo->driver, IONIC_DRV_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->fw_version, ionic->idev.dev_info.fw_version, + strscpy(drvinfo->driver, IONIC_DRV_NAME, sizeof(drvinfo->driver)); + strscpy(drvinfo->fw_version, ionic->idev.dev_info.fw_version, sizeof(drvinfo->fw_version)); - strlcpy(drvinfo->bus_info, ionic_bus_info(ionic), + strscpy(drvinfo->bus_info, ionic_bus_info(ionic), sizeof(drvinfo->bus_info)); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 278610ed7227..4a90f611c611 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -759,7 +759,7 @@ enum ionic_txq_desc_opcode { * IONIC_TXQ_DESC_OPCODE_CSUM_HW: * Offload 16-bit checksum computation to hardware. * If @csum_l3 is set then the packet's L3 checksum is - * updated. Similarly, if @csum_l4 is set the the L4 + * updated. Similarly, if @csum_l4 is set the L4 * checksum is updated. If @encap is set then encap header * checksums are also updated. * @@ -1368,9 +1368,9 @@ union ionic_port_config { * @status: link status (enum ionic_port_oper_status) * @id: port id * @speed: link speed (in Mbps) - * @link_down_count: number of times link went from from up to down + * @link_down_count: number of times link went from up to down * @fec_type: fec type (enum ionic_port_fec_type) - * @xcvr: tranceiver status + * @xcvr: transceiver status */ struct ionic_port_status { __le32 id; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 2ff7be17e5af..f3568901eb91 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -12,6 +12,7 @@ #include <linux/pci.h> #include <linux/cpumask.h> #include <linux/crash_dump.h> +#include <linux/vmalloc.h> #include "ionic.h" #include "ionic_bus.h" @@ -393,11 +394,11 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq) ionic_qcq_intr_free(lif, qcq); if (qcq->cq.info) { - devm_kfree(dev, qcq->cq.info); + vfree(qcq->cq.info); qcq->cq.info = NULL; } if (qcq->q.info) { - devm_kfree(dev, qcq->q.info); + vfree(qcq->q.info); qcq->q.info = NULL; } } @@ -528,8 +529,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->q.dev = dev; new->flags = flags; - new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info), - GFP_KERNEL); + new->q.info = vzalloc(num_descs * sizeof(*new->q.info)); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); err = -ENOMEM; @@ -550,8 +550,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, if (err) goto err_out; - new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info), - GFP_KERNEL); + new->cq.info = vzalloc(num_descs * sizeof(*new->cq.info)); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); err = -ENOMEM; @@ -640,14 +639,14 @@ err_out_free_cq: err_out_free_q: dma_free_coherent(dev, new->q_size, new->q_base, new->q_base_pa); err_out_free_cq_info: - devm_kfree(dev, new->cq.info); + vfree(new->cq.info); err_out_free_irq: if (flags & IONIC_QCQ_F_INTR) { devm_free_irq(dev, new->intr.vector, &new->napi); ionic_intr_free(lif->ionic, new->intr.index); } err_out_free_q_info: - devm_kfree(dev, new->q.info); + vfree(new->q.info); err_out_free_qcq: devm_kfree(dev, new); err_out: @@ -1112,12 +1111,17 @@ static bool ionic_notifyq_service(struct ionic_cq *cq, ionic_link_status_check_request(lif, CAN_NOT_SLEEP); break; case IONIC_EVENT_RESET: - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "Reset event dropped\n"); - } else { - work->type = IONIC_DW_TYPE_LIF_RESET; - ionic_lif_deferred_enqueue(&lif->deferred, work); + if (lif->ionic->idev.fw_status_ready && + !test_bit(IONIC_LIF_F_FW_RESET, lif->state) && + !test_and_set_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "Reset event dropped\n"); + clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state); + } else { + work->type = IONIC_DW_TYPE_LIF_RESET; + ionic_lif_deferred_enqueue(&lif->deferred, work); + } } break; default: @@ -1782,7 +1786,7 @@ static void ionic_lif_quiesce(struct ionic_lif *lif) err = ionic_adminq_post_wait(lif, &ctx); if (err) - netdev_err(lif->netdev, "lif quiesce failed %d\n", err); + netdev_dbg(lif->netdev, "lif quiesce failed %d\n", err); } static void ionic_txrx_disable(struct ionic_lif *lif) @@ -2152,6 +2156,76 @@ static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd } } +static int ionic_update_cached_vf_config(struct ionic *ionic, int vf) +{ + struct ionic_vf_getattr_comp comp = { 0 }; + int err; + u8 attr; + + attr = IONIC_VF_ATTR_VLAN; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].vlanid = comp.vlanid; + + attr = IONIC_VF_ATTR_SPOOFCHK; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].spoofchk = comp.spoofchk; + + attr = IONIC_VF_ATTR_LINKSTATE; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) { + switch (comp.linkstate) { + case IONIC_VF_LINK_STATUS_UP: + ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_ENABLE; + break; + case IONIC_VF_LINK_STATUS_DOWN: + ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_DISABLE; + break; + case IONIC_VF_LINK_STATUS_AUTO: + ionic->vfs[vf].linkstate = IFLA_VF_LINK_STATE_AUTO; + break; + default: + dev_warn(ionic->dev, "Unexpected link state %u\n", comp.linkstate); + break; + } + } + + attr = IONIC_VF_ATTR_RATE; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].maxrate = comp.maxrate; + + attr = IONIC_VF_ATTR_TRUST; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ionic->vfs[vf].trusted = comp.trust; + + attr = IONIC_VF_ATTR_MAC; + err = ionic_dev_cmd_vf_getattr(ionic, vf, attr, &comp); + if (err && comp.status != IONIC_RC_ENOSUPP) + goto err_out; + if (!err) + ether_addr_copy(ionic->vfs[vf].macaddr, comp.macaddr); + +err_out: + if (err) + dev_err(ionic->dev, "Failed to get %s for VF %d\n", + ionic_vf_attr_to_str(attr), vf); + + return err; +} + static int ionic_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivf) { @@ -2167,14 +2241,18 @@ static int ionic_get_vf_config(struct net_device *netdev, if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ivf->vf = vf; - ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid); - ivf->qos = 0; - ivf->spoofchk = ionic->vfs[vf].spoofchk; - ivf->linkstate = ionic->vfs[vf].linkstate; - ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate); - ivf->trusted = ionic->vfs[vf].trusted; - ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr); + ivf->vf = vf; + ivf->qos = 0; + + ret = ionic_update_cached_vf_config(ionic, vf); + if (!ret) { + ivf->vlan = le16_to_cpu(ionic->vfs[vf].vlanid); + ivf->spoofchk = ionic->vfs[vf].spoofchk; + ivf->linkstate = ionic->vfs[vf].linkstate; + ivf->max_tx_rate = le32_to_cpu(ionic->vfs[vf].maxrate); + ivf->trusted = ionic->vfs[vf].trusted; + ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr); + } } up_read(&ionic->vf_op_lock); @@ -2220,6 +2298,7 @@ static int ionic_get_vf_stats(struct net_device *netdev, int vf, static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_MAC }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; int ret; @@ -2235,7 +2314,11 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac); + ether_addr_copy(vfc.macaddr, mac); + dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n", + __func__, vf, vfc.macaddr); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) ether_addr_copy(ionic->vfs[vf].macaddr, mac); } @@ -2247,6 +2330,7 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, __be16 proto) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_VLAN }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; int ret; @@ -2269,8 +2353,11 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_VLAN, (u8 *)&vlan); + vfc.vlanid = cpu_to_le16(vlan); + dev_dbg(ionic->dev, "%s: vf %d vlan %d\n", + __func__, vf, le16_to_cpu(vfc.vlanid)); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) ionic->vfs[vf].vlanid = cpu_to_le16(vlan); } @@ -2282,6 +2369,7 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, static int ionic_set_vf_rate(struct net_device *netdev, int vf, int tx_min, int tx_max) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_RATE }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; int ret; @@ -2298,8 +2386,11 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf, if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_RATE, (u8 *)&tx_max); + vfc.maxrate = cpu_to_le32(tx_max); + dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n", + __func__, vf, le32_to_cpu(vfc.maxrate)); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) lif->ionic->vfs[vf].maxrate = cpu_to_le32(tx_max); } @@ -2310,9 +2401,9 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf, static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_SPOOFCHK }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - u8 data = set; /* convert to u8 for config */ int ret; if (!netif_device_present(netdev)) @@ -2323,10 +2414,13 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_SPOOFCHK, &data); + vfc.spoofchk = set; + dev_dbg(ionic->dev, "%s: vf %d spoof %d\n", + __func__, vf, vfc.spoofchk); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) - ionic->vfs[vf].spoofchk = data; + ionic->vfs[vf].spoofchk = set; } up_write(&ionic->vf_op_lock); @@ -2335,9 +2429,9 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set) static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_TRUST }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - u8 data = set; /* convert to u8 for config */ int ret; if (!netif_device_present(netdev)) @@ -2348,10 +2442,13 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_TRUST, &data); + vfc.trust = set; + dev_dbg(ionic->dev, "%s: vf %d trust %d\n", + __func__, vf, vfc.trust); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) - ionic->vfs[vf].trusted = data; + ionic->vfs[vf].trusted = set; } up_write(&ionic->vf_op_lock); @@ -2360,20 +2457,21 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set) static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) { + struct ionic_vf_setattr_cmd vfc = { .attr = IONIC_VF_ATTR_LINKSTATE }; struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; - u8 data; + u8 vfls; int ret; switch (set) { case IFLA_VF_LINK_STATE_ENABLE: - data = IONIC_VF_LINK_STATUS_UP; + vfls = IONIC_VF_LINK_STATUS_UP; break; case IFLA_VF_LINK_STATE_DISABLE: - data = IONIC_VF_LINK_STATUS_DOWN; + vfls = IONIC_VF_LINK_STATUS_DOWN; break; case IFLA_VF_LINK_STATE_AUTO: - data = IONIC_VF_LINK_STATUS_AUTO; + vfls = IONIC_VF_LINK_STATUS_AUTO; break; default: return -EINVAL; @@ -2387,8 +2485,11 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) { ret = -EINVAL; } else { - ret = ionic_set_vf_config(ionic, vf, - IONIC_VF_ATTR_LINKSTATE, &data); + vfc.linkstate = vfls; + dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n", + __func__, vf, vfc.linkstate); + + ret = ionic_set_vf_config(ionic, vf, &vfc); if (!ret) ionic->vfs[vf].linkstate = set; } @@ -2835,6 +2936,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) mutex_unlock(&lif->queue_lock); + clear_bit(IONIC_LIF_F_FW_STOPPING, lif->state); dev_info(ionic->dev, "FW Down: LIFs stopped\n"); } @@ -2934,8 +3036,6 @@ void ionic_lif_free(struct ionic_lif *lif) /* unmap doorbell page */ ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage); lif->kern_dbpage = NULL; - kfree(lif->dbid_inuse); - lif->dbid_inuse = NULL; mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); @@ -3135,22 +3235,12 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); - if (!lif->dbid_inuse) { - dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); - return -ENOMEM; - } - - /* first doorbell id reserved for kernel (dbid aka pid == zero) */ - set_bit(0, lif->dbid_inuse); lif->kern_pid = 0; - dbpage_num = ionic_db_page_num(lif, lif->kern_pid); lif->kern_dbpage = ionic_bus_map_dbpage(lif->ionic, dbpage_num); if (!lif->kern_dbpage) { dev_err(dev, "Cannot map dbpage, aborting\n"); - err = -ENOMEM; - goto err_out_free_dbid; + return -ENOMEM; } err = ionic_lif_adminq_init(lif); @@ -3186,15 +3276,13 @@ int ionic_lif_init(struct ionic_lif *lif) return 0; err_out_notifyq_deinit: + napi_disable(&lif->adminqcq->napi); ionic_lif_qcq_deinit(lif, lif->notifyqcq); err_out_adminq_deinit: ionic_lif_qcq_deinit(lif, lif->adminqcq); ionic_lif_reset(lif); ionic_bus_unmap_dbpage(lif->ionic, lif->kern_dbpage); lif->kern_dbpage = NULL; -err_out_free_dbid: - kfree(lif->dbid_inuse); - lif->dbid_inuse = NULL; return err; } @@ -3214,7 +3302,7 @@ static void ionic_lif_set_netdev_info(struct ionic_lif *lif) }, }; - strlcpy(ctx.cmd.lif_setattr.name, lif->netdev->name, + strscpy(ctx.cmd.lif_setattr.name, lif->netdev->name, sizeof(ctx.cmd.lif_setattr.name)); ionic_adminq_post_wait(lif, &ctx); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 9f7ab2f17f93..a53984bf3544 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -135,6 +135,7 @@ enum ionic_lif_state_flags { IONIC_LIF_F_LINK_CHECK_REQUESTED, IONIC_LIF_F_FILTER_SYNC_NEEDED, IONIC_LIF_F_FW_RESET, + IONIC_LIF_F_FW_STOPPING, IONIC_LIF_F_SPLIT_INTR, IONIC_LIF_F_BROKEN, IONIC_LIF_F_TX_DIM_INTR, @@ -213,7 +214,6 @@ struct ionic_lif { u32 rx_coalesce_hw; /* what the hw is using */ u32 tx_coalesce_usecs; /* what the user asked for */ u32 tx_coalesce_hw; /* what the hw is using */ - unsigned long *dbid_inuse; unsigned int dbid_count; struct ionic_phc *phc; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 875f4ec42efe..4029b4e021f8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -188,6 +188,28 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode) } } +const char *ionic_vf_attr_to_str(enum ionic_vf_attr attr) +{ + switch (attr) { + case IONIC_VF_ATTR_SPOOFCHK: + return "IONIC_VF_ATTR_SPOOFCHK"; + case IONIC_VF_ATTR_TRUST: + return "IONIC_VF_ATTR_TRUST"; + case IONIC_VF_ATTR_LINKSTATE: + return "IONIC_VF_ATTR_LINKSTATE"; + case IONIC_VF_ATTR_MAC: + return "IONIC_VF_ATTR_MAC"; + case IONIC_VF_ATTR_VLAN: + return "IONIC_VF_ATTR_VLAN"; + case IONIC_VF_ATTR_RATE: + return "IONIC_VF_ATTR_RATE"; + case IONIC_VF_ATTR_STATSADDR: + return "IONIC_VF_ATTR_STATSADDR"; + default: + return "IONIC_VF_ATTR_UNKNOWN"; + } +} + static void ionic_adminq_flush(struct ionic_lif *lif) { struct ionic_desc_info *desc_info; @@ -215,9 +237,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif) void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, u8 status, int err) { + const char *stat_str; + + stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" : + ionic_error_to_str(status); + netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n", - ionic_opcode_to_str(opcode), opcode, - ionic_error_to_str(status), err); + ionic_opcode_to_str(opcode), opcode, stat_str, err); } static int ionic_adminq_check_err(struct ionic_lif *lif, @@ -318,6 +344,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) netdev_err(netdev, "Posting of %s (%d) failed: %d\n", name, ctx->cmd.cmd.opcode, err); + ctx->comp.comp.status = IONIC_RC_ERROR; return err; } @@ -331,11 +358,15 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (remaining) break; - /* interrupt the wait if FW stopped */ - if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { + /* force a check of FW status and break out if FW reset */ + (void)ionic_heartbeat_check(lif->ionic); + if ((test_bit(IONIC_LIF_F_FW_RESET, lif->state) && + !lif->ionic->idev.fw_status_ready) || + test_bit(IONIC_LIF_F_FW_STOPPING, lif->state)) { if (do_msg) - netdev_err(netdev, "%s (%d) interrupted, FW in reset\n", - name, ctx->cmd.cmd.opcode); + netdev_warn(netdev, "%s (%d) interrupted, FW in reset\n", + name, ctx->cmd.cmd.opcode); + ctx->comp.comp.status = IONIC_RC_ERROR; return -ENXIO; } @@ -370,21 +401,34 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx * static void ionic_dev_cmd_clean(struct ionic *ionic) { - union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs; + struct ionic_dev *idev = &ionic->idev; - iowrite32(0, ®s->doorbell); - memset_io(®s->cmd, 0, sizeof(regs->cmd)); + iowrite32(0, &idev->dev_cmd_regs->doorbell); + memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd)); } -int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) +void ionic_dev_cmd_dev_err_print(struct ionic *ionic, u8 opcode, u8 status, + int err) +{ + const char *stat_str; + + stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" : + ionic_error_to_str(status); + + dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n", + ionic_opcode_to_str(opcode), opcode, stat_str, err); +} + +static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, + const bool do_msg) { struct ionic_dev *idev = &ionic->idev; unsigned long start_time; unsigned long max_wait; unsigned long duration; + int done = 0; + bool fw_up; int opcode; - int hb = 0; - int done; int err; /* Wait for dev cmd to complete, retrying if we get EAGAIN, @@ -394,31 +438,24 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) try_again: opcode = readb(&idev->dev_cmd_regs->cmd.cmd.opcode); start_time = jiffies; - do { + for (fw_up = ionic_is_fw_running(idev); + !done && fw_up && time_before(jiffies, max_wait); + fw_up = ionic_is_fw_running(idev)) { done = ionic_dev_cmd_done(idev); if (done) break; usleep_range(100, 200); - - /* Don't check the heartbeat on FW_CONTROL commands as they are - * notorious for interrupting the firmware's heartbeat update. - */ - if (opcode != IONIC_CMD_FW_CONTROL) - hb = ionic_heartbeat_check(ionic); - } while (!done && !hb && time_before(jiffies, max_wait)); + } duration = jiffies - start_time; dev_dbg(ionic->dev, "DEVCMD %s (%d) done=%d took %ld secs (%ld jiffies)\n", ionic_opcode_to_str(opcode), opcode, done, duration / HZ, duration); - if (!done && hb) { - /* It is possible (but unlikely) that FW was busy and missed a - * heartbeat check but is still alive and will process this - * request, so don't clean the dev_cmd in this case. - */ - dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n", - ionic_opcode_to_str(opcode), opcode); + if (!done && !fw_up) { + ionic_dev_cmd_clean(ionic); + dev_warn(ionic->dev, "DEVCMD %s (%d) interrupted - FW is down\n", + ionic_opcode_to_str(opcode), opcode); return -ENXIO; } @@ -444,9 +481,9 @@ try_again: } if (!(opcode == IONIC_CMD_FW_CONTROL && err == IONIC_RC_EAGAIN)) - dev_err(ionic->dev, "DEV_CMD %s (%d) error, %s (%d) failed\n", - ionic_opcode_to_str(opcode), opcode, - ionic_error_to_str(err), err); + if (do_msg) + ionic_dev_cmd_dev_err_print(ionic, opcode, err, + ionic_error_to_errno(err)); return ionic_error_to_errno(err); } @@ -454,6 +491,16 @@ try_again: return 0; } +int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) +{ + return __ionic_dev_cmd_wait(ionic, max_seconds, true); +} + +int ionic_dev_cmd_wait_nomsg(struct ionic *ionic, unsigned long max_seconds) +{ + return __ionic_dev_cmd_wait(ionic, max_seconds, false); +} + int ionic_setup(struct ionic *ionic) { int err; @@ -540,6 +587,9 @@ int ionic_reset(struct ionic *ionic) struct ionic_dev *idev = &ionic->idev; int err; + if (!ionic_is_fw_running(idev)) + return 0; + mutex_lock(&ionic->dev_cmd_lock); ionic_dev_cmd_reset(idev); err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); @@ -612,15 +662,17 @@ int ionic_port_init(struct ionic *ionic) int ionic_port_reset(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; - int err; + int err = 0; if (!idev->port_info) return 0; - mutex_lock(&ionic->dev_cmd_lock); - ionic_dev_cmd_port_reset(idev); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); - mutex_unlock(&ionic->dev_cmd_lock); + if (ionic_is_fw_running(idev)) { + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_reset(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + } dma_free_coherent(ionic->dev, idev->port_info_sz, idev->port_info, idev->port_info_pa); @@ -628,9 +680,6 @@ int ionic_port_reset(struct ionic *ionic) idev->port_info = NULL; idev->port_info_pa = 0; - if (err) - dev_err(ionic->dev, "Failed to reset port\n"); - return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index f6e785f949f9..b7363376dfc8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -376,10 +376,24 @@ static int ionic_lif_filter_add(struct ionic_lif *lif, spin_unlock_bh(&lif->rx_filters.lock); - if (err == -ENOSPC) { - if (le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN) - lif->max_vlans = lif->nvlans; + /* store the max_vlans limit that we found */ + if (err == -ENOSPC && + le16_to_cpu(ctx.cmd.rx_filter_add.match) == IONIC_RX_FILTER_MATCH_VLAN) + lif->max_vlans = lif->nvlans; + + /* Prevent unnecessary error messages on recoverable + * errors as the filter will get retried on the next + * sync attempt. + */ + switch (err) { + case -ENOSPC: + case -ENXIO: + case -ETIMEDOUT: + case -EAGAIN: + case -EBUSY: return 0; + default: + break; } ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode, @@ -494,9 +508,22 @@ static int ionic_lif_filter_del(struct ionic_lif *lif, spin_unlock_bh(&lif->rx_filters.lock); if (state != IONIC_FILTER_STATE_NEW) { - err = ionic_adminq_post_wait(lif, &ctx); - if (err && err != -EEXIST) + err = ionic_adminq_post_wait_nomsg(lif, &ctx); + + switch (err) { + /* ignore these errors */ + case -EEXIST: + case -ENXIO: + case -ETIMEDOUT: + case -EAGAIN: + case -EBUSY: + case 0: + break; + default: + ionic_adminq_netdev_err_print(lif, ctx.cmd.cmd.opcode, + ctx.comp.comp.status, err); return err; + } } return 0; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c index fd6806b4a1b9..9859a4432985 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -151,7 +151,6 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = { IONIC_RX_STAT_DESC(vlan_stripped), }; - #define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc) #define IONIC_NUM_PORT_STATS ARRAY_SIZE(ionic_port_stats_desc) #define IONIC_NUM_TX_STATS ARRAY_SIZE(ionic_tx_stats_desc) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 94384f5d2a22..f54035455ad6 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -10,7 +10,6 @@ #include "ionic_lif.h" #include "ionic_txrx.h" - static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell, ionic_desc_cb cb_func, void *cb_arg) { @@ -669,27 +668,37 @@ dma_fail: return -EIO; } +static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q, + struct ionic_desc_info *desc_info) +{ + struct ionic_buf_info *buf_info = desc_info->bufs; + struct device *dev = q->dev; + unsigned int i; + + if (!desc_info->nbufs) + return; + + dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + buf_info++; + for (i = 1; i < desc_info->nbufs; i++, buf_info++) + dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + + desc_info->nbufs = 0; +} + static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info, struct ionic_cq_info *cq_info, void *cb_arg) { - struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_qcq *qcq = q_to_qcq(q); struct sk_buff *skb = cb_arg; - struct device *dev = q->dev; - unsigned int i; u16 qi; - if (desc_info->nbufs) { - dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr, - buf_info->len, DMA_TO_DEVICE); - buf_info++; - for (i = 1; i < desc_info->nbufs; i++, buf_info++) - dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr, - buf_info->len, DMA_TO_DEVICE); - } + ionic_tx_desc_unmap_bufs(q, desc_info); if (!skb) return; @@ -931,8 +940,11 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) err = ionic_tx_tcp_inner_pseudo_csum(skb); else err = ionic_tx_tcp_pseudo_csum(skb); - if (err) + if (err) { + /* clean up mapping from ionic_tx_map_skb */ + ionic_tx_desc_unmap_bufs(q, desc_info); return err; + } if (encap) hdrlen = skb_inner_transport_header(skb) - skb->data + @@ -1003,8 +1015,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) return 0; } -static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, - struct ionic_desc_info *desc_info) +static void ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { struct ionic_txq_desc *desc = desc_info->txq_desc; struct ionic_buf_info *buf_info = desc_info->bufs; @@ -1038,12 +1050,10 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, stats->crc32_csum++; else stats->csum++; - - return 0; } -static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, - struct ionic_desc_info *desc_info) +static void ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { struct ionic_txq_desc *desc = desc_info->txq_desc; struct ionic_buf_info *buf_info = desc_info->bufs; @@ -1074,12 +1084,10 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, desc->csum_offset = 0; stats->csum_none++; - - return 0; } -static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, - struct ionic_desc_info *desc_info) +static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc; struct ionic_buf_info *buf_info = &desc_info->bufs[1]; @@ -1093,31 +1101,24 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, } stats->frags += skb_shinfo(skb)->nr_frags; - - return 0; } static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) { struct ionic_desc_info *desc_info = &q->info[q->head_idx]; struct ionic_tx_stats *stats = q_to_tx_stats(q); - int err; if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) return -EIO; /* set up the initial descriptor */ if (skb->ip_summed == CHECKSUM_PARTIAL) - err = ionic_tx_calc_csum(q, skb, desc_info); + ionic_tx_calc_csum(q, skb, desc_info); else - err = ionic_tx_calc_no_csum(q, skb, desc_info); - if (err) - return err; + ionic_tx_calc_no_csum(q, skb, desc_info); /* add frags */ - err = ionic_tx_skb_frags(q, skb, desc_info); - if (err) - return err; + ionic_tx_skb_frags(q, skb, desc_info); skb_tx_timestamp(skb); stats->pkts++; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index cc4ec2bb36db..672480c9d195 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3098,6 +3098,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) continue; } + /* Some flows may keep variable set */ + p_hwfn->mcp_info->mcp_handling_status = 0; + rc = qed_calc_hw_mode(p_hwfn); if (rc) return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index da1eadabcb41..9fb1fa479d4b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -140,7 +140,7 @@ static struct qed_mcp_cmd_elem *qed_mcp_cmd_get_elem(struct qed_hwfn *p_hwfn, int qed_mcp_free(struct qed_hwfn *p_hwfn) { if (p_hwfn->mcp_info) { - struct qed_mcp_cmd_elem *p_cmd_elem, *p_tmp; + struct qed_mcp_cmd_elem *p_cmd_elem = NULL, *p_tmp; kfree(p_hwfn->mcp_info->mfw_mb_cur); kfree(p_hwfn->mcp_info->mfw_mb_shadow); @@ -249,6 +249,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) /* Initialize the MFW spinlock */ spin_lock_init(&p_info->cmd_lock); spin_lock_init(&p_info->link_lock); + spin_lock_init(&p_info->unload_lock); INIT_LIST_HEAD(&p_info->cmd_list); @@ -614,12 +615,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, usecs); } -int qed_mcp_cmd(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 cmd, - u32 param, - u32 *o_mcp_resp, - u32 *o_mcp_param) +static int _qed_mcp_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param, + bool can_sleep) { struct qed_mcp_mb_params mb_params; int rc; @@ -627,6 +629,7 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, memset(&mb_params, 0, sizeof(mb_params)); mb_params.cmd = cmd; mb_params.param = param; + mb_params.flags = can_sleep ? QED_MB_FLAG_CAN_SLEEP : 0; rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); if (rc) @@ -638,6 +641,28 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, return 0; } +int qed_mcp_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param) +{ + return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param, + o_mcp_resp, o_mcp_param, true)); +} + +int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param) +{ + return (_qed_mcp_cmd(p_hwfn, p_ptt, cmd, param, + o_mcp_resp, o_mcp_param, false)); +} + static int qed_mcp_nvm_wr_cmd(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -1071,10 +1096,15 @@ int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return 0; } +#define MFW_COMPLETION_MAX_ITER 5000 +#define MFW_COMPLETION_INTERVAL_MS 1 + int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_mb_params mb_params; + u32 cnt = MFW_COMPLETION_MAX_ITER; u32 wol_param; + int rc; switch (p_hwfn->cdev->wol_config) { case QED_OV_WOL_DISABLED: @@ -1097,7 +1127,23 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) mb_params.param = wol_param; mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK; - return qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + spin_lock_bh(&p_hwfn->mcp_info->unload_lock); + set_bit(QED_MCP_BYPASS_PROC_BIT, + &p_hwfn->mcp_info->mcp_handling_status); + spin_unlock_bh(&p_hwfn->mcp_info->unload_lock); + + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + + while (test_bit(QED_MCP_IN_PROCESSING_BIT, + &p_hwfn->mcp_info->mcp_handling_status) && --cnt) + msleep(MFW_COMPLETION_INTERVAL_MS); + + if (!cnt) + DP_NOTICE(p_hwfn, + "Failed to wait MFW event completion after %d msec\n", + MFW_COMPLETION_MAX_ITER * MFW_COMPLETION_INTERVAL_MS); + + return rc; } int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) @@ -1728,8 +1774,8 @@ static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_configure_pf_max_bandwidth(p_hwfn->cdev, p_info->bandwidth_max); /* Acknowledge the MFW */ - qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp, - ¶m); + qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_BW_UPDATE_ACK, 0, &resp, + ¶m); } static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) @@ -1766,8 +1812,8 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) p_hwfn->mcp_info->func_info.ovlan, p_hwfn->hw_info.hw_mode); /* Acknowledge the MFW */ - qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0, - &resp, ¶m); + qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0, + &resp, ¶m); } static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn, @@ -1997,6 +2043,19 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, "Msg [%d] - old CMD 0x%02x, new CMD 0x%02x\n", i, info->mfw_mb_shadow[i], info->mfw_mb_cur[i]); + spin_lock_bh(&p_hwfn->mcp_info->unload_lock); + if (test_bit(QED_MCP_BYPASS_PROC_BIT, + &p_hwfn->mcp_info->mcp_handling_status)) { + spin_unlock_bh(&p_hwfn->mcp_info->unload_lock); + DP_INFO(p_hwfn, + "Msg [%d] is bypassed on unload flow\n", i); + continue; + } + + set_bit(QED_MCP_IN_PROCESSING_BIT, + &p_hwfn->mcp_info->mcp_handling_status); + spin_unlock_bh(&p_hwfn->mcp_info->unload_lock); + switch (i) { case MFW_DRV_MSG_LINK_CHANGE: qed_mcp_handle_link_change(p_hwfn, p_ptt, false); @@ -2050,6 +2109,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, DP_INFO(p_hwfn, "Unimplemented MFW message %d\n", i); rc = -EINVAL; } + + clear_bit(QED_MCP_IN_PROCESSING_BIT, + &p_hwfn->mcp_info->mcp_handling_status); } /* ACK everything */ @@ -3675,8 +3737,8 @@ static int qed_mcp_resource_cmd(struct qed_hwfn *p_hwfn, { int rc; - rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD, param, - p_mcp_resp, p_mcp_param); + rc = qed_mcp_cmd_nosleep(p_hwfn, p_ptt, DRV_MSG_CODE_RESOURCE_CMD, + param, p_mcp_resp, p_mcp_param); if (rc) return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 369e1892450a..9bd0565fe8ab 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -393,11 +393,12 @@ int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 *p_board_config); /** - * qed_mcp_cmd(): General function for sending commands to the MCP + * qed_mcp_cmd(): Sleepable function for sending commands to the MCP * mailbox. It acquire mutex lock for the entire * operation, from sending the request until the MCP * response. Waiting for MCP response will be checked up - * to 5 seconds every 5ms. + * to 5 seconds every 10ms. Should not be called from atomic + * context. * * @p_hwfn: HW device data. * @p_ptt: PTT required for register access. @@ -417,6 +418,31 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, u32 *o_mcp_param); /** + * qed_mcp_cmd_nosleep(): Function for sending commands to the MCP + * mailbox. It acquire mutex lock for the entire + * operation, from sending the request until the MCP + * response. Waiting for MCP response will be checked up + * to 5 seconds every 10us. Should be called when sleep + * is not allowed. + * + * @p_hwfn: HW device data. + * @p_ptt: PTT required for register access. + * @cmd: command to be sent to the MCP. + * @param: Optional param + * @o_mcp_resp: The MCP response code (exclude sequence). + * @o_mcp_param: Optional parameter provided by the MCP + * response + * + * Return: Int - 0 - Operation was successul. + */ +int qed_mcp_cmd_nosleep(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param); + +/** * qed_mcp_drain(): drains the nig, allowing completion to pass in * case of pauses. * (Should be called only from sleepable context) @@ -762,6 +788,14 @@ struct qed_mcp_info { /* S/N for debug data mailbox commands */ atomic_t dbg_data_seq; + + /* Spinlock used to sync the flag mcp_handling_status with + * the mfw events handler + */ + spinlock_t unload_lock; + unsigned long mcp_handling_status; +#define QED_MCP_BYPASS_PROC_BIT 0 +#define QED_MCP_IN_PROCESSING_BIT 1 }; struct qed_mcp_mb_params { diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 48cf4355bc47..0848b5529d48 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -2984,12 +2984,16 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, u8 mask = QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; struct qed_filter_accept_flags *flags = ¶ms->accept_flags; struct qed_public_vf_info *vf_info; + u16 tlv_mask; + + tlv_mask = BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM) | + BIT(QED_IOV_VP_UPDATE_ACCEPT_ANY_VLAN); /* Untrusted VFs can't even be trusted to know that fact. * Simply indicate everything is configured fine, and trace * configuration 'behind their back'. */ - if (!(*tlvs & BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM))) + if (!(*tlvs & tlv_mask)) return 0; vf_info = qed_iov_get_public_vf_info(hwfn, vfid, true); @@ -3006,6 +3010,13 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, flags->tx_accept_filter &= ~mask; } + if (params->update_accept_any_vlan_flg) { + vf_info->accept_any_vlan = params->accept_any_vlan; + + if (vf_info->forced_vlan && !vf_info->is_trusted_configured) + params->accept_any_vlan = false; + } + return 0; } @@ -4719,6 +4730,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, tx_rate = vf_info->tx_rate; ivi->max_tx_rate = tx_rate ? tx_rate : link.speed; ivi->min_tx_rate = qed_iov_get_vf_min_rate(hwfn, vf_id); + ivi->trusted = vf_info->is_trusted_request; return 0; } @@ -5149,6 +5161,12 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.update_ctl_frame_check = 1; params.mac_chk_en = !vf_info->is_trusted_configured; + params.update_accept_any_vlan_flg = 0; + + if (vf_info->accept_any_vlan && vf_info->forced_vlan) { + params.update_accept_any_vlan_flg = 1; + params.accept_any_vlan = vf_info->accept_any_vlan; + } if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; @@ -5164,13 +5182,20 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) if (!vf_info->is_trusted_configured) { flags->rx_accept_filter &= ~mask; flags->tx_accept_filter &= ~mask; + params.accept_any_vlan = false; } if (flags->update_rx_mode_config || flags->update_tx_mode_config || - params.update_ctl_frame_check) + params.update_ctl_frame_check || + params.update_accept_any_vlan_flg) { + DP_VERBOSE(hwfn, QED_MSG_IOV, + "vport update config for %s VF[abs 0x%x rel 0x%x]\n", + vf_info->is_trusted_configured ? "trusted" : "untrusted", + vf->abs_vf_id, vf->relative_vf_id); qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); + } } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index f448e3dd6c8b..6ee2493de164 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -62,6 +62,7 @@ struct qed_public_vf_info { bool is_trusted_request; u8 rx_accept_mode; u8 tx_accept_mode; + bool accept_any_vlan; }; struct qed_iov_vf_init_params { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 29cdcb2285b1..bcf3746220df 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -10,6 +10,7 @@ #include <linux/ipv6.h> #include <net/checksum.h> #include <linux/printk.h> +#include <linux/jiffies.h> #include "qlcnic.h" @@ -332,7 +333,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (ether_addr_equal(tmp_fil->faddr, (u8 *)&src_addr) && tmp_fil->vlan_id == vlan_id) { - if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) + if (time_is_before_jiffies(QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); tmp_fil->ftime = jiffies; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 955cce644392..c865a4be05ee 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -435,7 +435,7 @@ qcaspi_receive(struct qcaspi *qca) qca->rx_skb->protocol = eth_type_trans( qca->rx_skb, qca->rx_skb->dev); skb_checksum_none_assert(qca->rx_skb); - netif_rx_ni(qca->rx_skb); + netif_rx(qca->rx_skb); qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, net_dev->mtu + VLAN_ETH_HLEN); if (!qca->rx_skb) { @@ -1001,7 +1001,7 @@ qca_spi_probe(struct spi_device *spi) return 0; } -static int +static void qca_spi_remove(struct spi_device *spi) { struct net_device *qcaspi_devs = spi_get_drvdata(spi); @@ -1011,8 +1011,6 @@ qca_spi_remove(struct spi_device *spi) unregister_netdev(qcaspi_devs); free_netdev(qcaspi_devs); - - return 0; } static const struct spi_device_id qca_spi_id[] = { diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 27c4f43176aa..26646cb6a20a 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -108,7 +108,7 @@ qca_tty_receive(struct serdev_device *serdev, const unsigned char *data, qca->rx_skb->protocol = eth_type_trans( qca->rx_skb, qca->rx_skb->dev); skb_checksum_none_assert(qca->rx_skb); - netif_rx_ni(qca->rx_skb); + netif_rx(qca->rx_skb); qca->rx_skb = netdev_alloc_skb_ip_align(netdev, netdev->mtu + VLAN_ETH_HLEN); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c index 3676976c875b..ba194698cc14 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c @@ -298,7 +298,6 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, { struct rmnet_map_header *map_header; u32 padding, map_datalen; - u8 *padbytes; map_datalen = skb->len - hdrlen; map_header = (struct rmnet_map_header *) @@ -323,8 +322,7 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, if (skb_tailroom(skb) < padding) return NULL; - padbytes = (u8 *)skb_put(skb, padding); - memset(padbytes, 0, padding); + skb_put_zero(skb, padding); done: map_header->pkt_len = htons(map_datalen + padding); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 19e2621e0645..67014eb76969 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2667,10 +2667,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38: rtl_eri_set_bits(tp, 0xd4, 0x0c00); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: - rtl_eri_set_bits(tp, 0xd4, 0x1f80); - break; - case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63: r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80); break; default: @@ -2678,13 +2675,48 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp) } } +static void rtl_disable_exit_l1(struct rtl8169_private *tp) +{ + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: + rtl_eri_clear_bits(tp, 0xd4, 0x1f00); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_63: + r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0); + break; + default: + break; + } +} + static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { /* Don't enable ASPM in the chip if OS can't control ASPM */ if (enable && tp->aspm_manageable) { RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en); RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn); + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48: + case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63: + /* reset ephy tx/rx disable timer */ + r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0); + /* chip can trigger L1.2 */ + r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, BIT(2)); + break; + default: + break; + } } else { + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_45 ... RTL_GIGA_MAC_VER_48: + case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63: + r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0); + break; + default: + break; + } + RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn); RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en); } @@ -4683,7 +4715,7 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl_pci_commit(tp); rtl8169_cleanup(tp, true); - + rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); } @@ -4843,8 +4875,6 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp) rtl8169_down(tp); } -#ifdef CONFIG_PM - static int rtl8169_runtime_resume(struct device *dev) { struct rtl8169_private *tp = dev_get_drvdata(dev); @@ -4860,7 +4890,7 @@ static int rtl8169_runtime_resume(struct device *dev) return 0; } -static int __maybe_unused rtl8169_suspend(struct device *device) +static int rtl8169_suspend(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); @@ -4873,7 +4903,7 @@ static int __maybe_unused rtl8169_suspend(struct device *device) return 0; } -static int __maybe_unused rtl8169_resume(struct device *device) +static int rtl8169_resume(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); @@ -4915,13 +4945,11 @@ static int rtl8169_runtime_idle(struct device *device) } static const struct dev_pm_ops rtl8169_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume) - SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume, - rtl8169_runtime_idle) + SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume) + RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume, + rtl8169_runtime_idle) }; -#endif /* CONFIG_PM */ - static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp) { /* WoL fails with 8168b when the receiver is disabled. */ @@ -5255,6 +5283,16 @@ done: rtl_rar_set(tp, mac_addr); } +/* register is set if system vendor successfully tested ASPM 1.2 */ +static bool rtl_aspm_is_safe(struct rtl8169_private *tp) +{ + if (tp->mac_version >= RTL_GIGA_MAC_VER_60 && + r8168_mac_ocp_read(tp, 0xc0b2) & 0xf) + return true; + + return false; +} + static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { struct rtl8169_private *tp; @@ -5333,7 +5371,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * Chips from RTL8168h partially have issues with L1.2, but seem * to work fine with L1 and L1.1. */ - if (tp->mac_version >= RTL_GIGA_MAC_VER_45) + if (rtl_aspm_is_safe(tp)) + rc = 0; + else if (tp->mac_version >= RTL_GIGA_MAC_VER_45) rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); else rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); @@ -5460,9 +5500,7 @@ static struct pci_driver rtl8169_pci_driver = { .probe = rtl_init_one, .remove = rtl_remove_one, .shutdown = rtl_shutdown, -#ifdef CONFIG_PM - .driver.pm = &rtl8169_pm_ops, -#endif + .driver.pm = pm_ptr(&rtl8169_pm_ops), }; module_pci_driver(rtl8169_pci_driver); diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index f7ad5487879b..15c295f90196 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -429,15 +429,6 @@ static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = { { 0x0d, 0xf880 } }; -static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = { - { 0x1f, 0x0002 }, - { 0x05, 0x669a }, - { 0x1f, 0x0005 }, - { 0x05, 0x8330 }, - { 0x06, 0x669a }, - { 0x1f, 0x0002 } -}; - static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, struct phy_device *phydev, u16 val) @@ -455,6 +446,29 @@ static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, r8169_apply_firmware(tp); } +static void rtl8168d_1_common(struct phy_device *phydev) +{ + u16 val; + + phy_write_paged(phydev, 0x0002, 0x05, 0x669a); + r8168d_phy_param(phydev, 0x8330, 0xffff, 0x669a); + phy_write(phydev, 0x1f, 0x0002); + + val = phy_read(phydev, 0x0d); + + if ((val & 0x00ff) != 0x006c) { + static const u16 set[] = { + 0x0065, 0x0066, 0x0067, 0x0068, + 0x0069, 0x006a, 0x006b, 0x006c + }; + int i; + + val &= 0xff00; + for (i = 0; i < ARRAY_SIZE(set); i++) + phy_write(phydev, 0x0d, val | set[i]); + } +} + static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev) { @@ -469,25 +483,7 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp, phy_modify(phydev, 0x0c, 0x5d00, 0xa200); if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - int val; - - rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1); - - val = phy_read(phydev, 0x0d); - - if ((val & 0x00ff) != 0x006c) { - static const u32 set[] = { - 0x0065, 0x0066, 0x0067, 0x0068, - 0x0069, 0x006a, 0x006b, 0x006c - }; - int i; - - phy_write(phydev, 0x1f, 0x0002); - - val &= 0xff00; - for (i = 0; i < ARRAY_SIZE(set); i++) - phy_write(phydev, 0x0d, val | set[i]); - } + rtl8168d_1_common(phydev); } else { phy_write_paged(phydev, 0x0002, 0x05, 0x6662); r8168d_phy_param(phydev, 0x8330, 0xffff, 0x6662); @@ -513,24 +509,7 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp, rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0); if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - int val; - - rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1); - - val = phy_read(phydev, 0x0d); - if ((val & 0x00ff) != 0x006c) { - static const u32 set[] = { - 0x0065, 0x0066, 0x0067, 0x0068, - 0x0069, 0x006a, 0x006b, 0x006c - }; - int i; - - phy_write(phydev, 0x1f, 0x0002); - - val &= 0xff00; - for (i = 0; i < ARRAY_SIZE(set); i++) - phy_write(phydev, 0x0d, val | set[i]); - } + rtl8168d_1_common(phydev); } else { phy_write_paged(phydev, 0x0002, 0x05, 0x2642); r8168d_phy_param(phydev, 0x8330, 0xffff, 0x2642); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index b215cde68e10..525d66f71f02 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -475,7 +475,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) goto error; for (i = 0; i < priv->num_rx_ring[q]; i++) { - skb = netdev_alloc_skb(ndev, info->max_rx_len); + skb = __netdev_alloc_skb(ndev, info->max_rx_len, GFP_KERNEL); if (!skb) goto error; ravb_set_buffer_align(skb); @@ -1432,11 +1432,7 @@ static int ravb_phy_init(struct net_device *ndev) * at this time. */ if (soc_device_match(r8a7795es10)) { - err = phy_set_max_speed(phydev, SPEED_100); - if (err) { - netdev_err(ndev, "failed to limit PHY to 100Mbit/s\n"); - goto err_phy_disconnect; - } + phy_set_max_speed(phydev, SPEED_100); netdev_info(ndev, "limited PHY to 100Mbit/s\n"); } @@ -1457,8 +1453,6 @@ static int ravb_phy_init(struct net_device *ndev) return 0; -err_phy_disconnect: - phy_disconnect(phydev); err_deregister_fixed_link: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); @@ -2854,7 +2848,6 @@ static int ravb_wol_restore(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; - int ret; if (info->nc_queues) napi_enable(&priv->napi[RAVB_NC]); @@ -2863,9 +2856,7 @@ static int ravb_wol_restore(struct net_device *ndev) /* Disable MagicPacket */ ravb_modify(ndev, ECMR, ECMR_MPDE, 0); - ret = ravb_close(ndev); - if (ret < 0) - return ret; + ravb_close(ndev); return disable_irq_wake(priv->emac_irq); } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index d947a628e166..67ade78fb767 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2026,14 +2026,8 @@ static int sh_eth_phy_init(struct net_device *ndev) } /* mask with MAC supported features */ - if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) { - int err = phy_set_max_speed(phydev, SPEED_100); - if (err) { - netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n"); - phy_disconnect(phydev); - return err; - } - } + if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) + phy_set_max_speed(phydev, SPEED_100); phy_attached_info(phydev); @@ -3450,9 +3444,7 @@ static int sh_eth_wol_restore(struct net_device *ndev) * both be reset and all registers restored. This is what * happens during suspend and resume without WoL enabled. */ - ret = sh_eth_close(ndev); - if (ret < 0) - return ret; + sh_eth_close(ndev); ret = sh_eth_open(ndev); if (ret < 0) return ret; @@ -3464,7 +3456,7 @@ static int sh_eth_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct sh_eth_private *mdp = netdev_priv(ndev); - int ret = 0; + int ret; if (!netif_running(ndev)) return 0; @@ -3483,7 +3475,7 @@ static int sh_eth_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct sh_eth_private *mdp = netdev_priv(ndev); - int ret = 0; + int ret; if (!netif_running(ndev)) return 0; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 2881f5b2b5f4..407a1f8e3059 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -127,7 +127,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv) /* MAC core supports the EEE feature. */ if (priv->hw_cap.eee) { /* Check if the PHY supports EEE */ - if (phy_init_eee(ndev->phydev, 1)) + if (phy_init_eee(ndev->phydev, true)) return false; priv->eee_active = 1; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index cf366ed2557c..50d535981a35 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3990,6 +3990,30 @@ static unsigned int ef10_check_caps(const struct efx_nic *efx, } } +static unsigned int efx_ef10_recycle_ring_size(const struct efx_nic *efx) +{ + unsigned int ret = EFX_RECYCLE_RING_SIZE_10G; + + /* There is no difference between PFs and VFs. The side is based on + * the maximum link speed of a given NIC. + */ + switch (efx->pci_dev->device & 0xfff) { + case 0x0903: /* Farmingdale can do up to 10G */ + break; + case 0x0923: /* Greenport can do up to 40G */ + case 0x0a03: /* Medford can do up to 40G */ + ret *= 4; + break; + default: /* Medford2 can do up to 100G */ + ret *= 10; + } + + if (IS_ENABLED(CONFIG_PPC64)) + ret *= 4; + + return ret; +} + #define EF10_OFFLOAD_FEATURES \ (NETIF_F_IP_CSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | \ @@ -4106,6 +4130,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .check_caps = ef10_check_caps, .print_additional_fwver = efx_ef10_print_additional_fwver, .sensor_event = efx_mcdi_sensor_event, + .rx_recycle_ring_size = efx_ef10_recycle_ring_size, }; const struct efx_nic_type efx_hunt_a0_nic_type = { @@ -4243,4 +4268,5 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .check_caps = ef10_check_caps, .print_additional_fwver = efx_ef10_print_additional_fwver, .sensor_event = efx_mcdi_sensor_event, + .rx_recycle_ring_size = efx_ef10_recycle_ring_size, }; diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index f79b14a119ae..a07cbf45a326 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -23,6 +23,7 @@ #include "ef100_rx.h" #include "ef100_tx.h" #include "ef100_netdev.h" +#include "rx_common.h" #define EF100_MAX_VIS 4096 #define EF100_NUM_MCDI_BUFFERS 1 @@ -696,6 +697,12 @@ static unsigned int ef100_check_caps(const struct efx_nic *efx, } } +static unsigned int efx_ef100_recycle_ring_size(const struct efx_nic *efx) +{ + /* Maximum link speed for Riverhead is 100G */ + return 10 * EFX_RECYCLE_RING_SIZE_10G; +} + /* NIC level access functions */ #define EF100_OFFLOAD_FEATURES (NETIF_F_HW_CSUM | NETIF_F_RXCSUM | \ @@ -770,6 +777,7 @@ const struct efx_nic_type ef100_pf_nic_type = { .rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config, .rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config, .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts, + .rx_recycle_ring_size = efx_ef100_recycle_ring_size, .reconfigure_mac = ef100_reconfigure_mac, .reconfigure_port = efx_mcdi_port_reconfigure, @@ -849,6 +857,7 @@ const struct efx_nic_type ef100_vf_nic_type = { .rx_pull_rss_config = efx_mcdi_rx_pull_rss_config, .rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config, .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts, + .rx_recycle_ring_size = efx_ef100_recycle_ring_size, .reconfigure_mac = ef100_reconfigure_mac, .test_nvram = efx_new_mcdi_nvram_test_all, diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index ead550ae2709..d6fdcdc530ca 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -78,31 +78,48 @@ static const struct efx_channel_type efx_default_channel_type = { * INTERRUPTS *************/ -static unsigned int efx_wanted_parallelism(struct efx_nic *efx) +static unsigned int count_online_cores(struct efx_nic *efx, bool local_node) { - cpumask_var_t thread_mask; + cpumask_var_t filter_mask; unsigned int count; int cpu; + if (unlikely(!zalloc_cpumask_var(&filter_mask, GFP_KERNEL))) { + netif_warn(efx, probe, efx->net_dev, + "RSS disabled due to allocation failure\n"); + return 1; + } + + cpumask_copy(filter_mask, cpu_online_mask); + if (local_node) { + int numa_node = pcibus_to_node(efx->pci_dev->bus); + + cpumask_and(filter_mask, filter_mask, cpumask_of_node(numa_node)); + } + + count = 0; + for_each_cpu(cpu, filter_mask) { + ++count; + cpumask_andnot(filter_mask, filter_mask, topology_sibling_cpumask(cpu)); + } + + free_cpumask_var(filter_mask); + + return count; +} + +static unsigned int efx_wanted_parallelism(struct efx_nic *efx) +{ + unsigned int count; + if (rss_cpus) { count = rss_cpus; } else { - if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { - netif_warn(efx, probe, efx->net_dev, - "RSS disabled due to allocation failure\n"); - return 1; - } + count = count_online_cores(efx, true); - count = 0; - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, thread_mask)) { - ++count; - cpumask_or(thread_mask, thread_mask, - topology_sibling_cpumask(cpu)); - } - } - - free_cpumask_var(thread_mask); + /* If no online CPUs in local node, fallback to any online CPUs */ + if (count == 0) + count = count_online_cores(efx, false); } if (count > EFX_MAX_RX_QUEUES) { @@ -369,12 +386,20 @@ int efx_probe_interrupts(struct efx_nic *efx) #if defined(CONFIG_SMP) void efx_set_interrupt_affinity(struct efx_nic *efx) { + int numa_node = pcibus_to_node(efx->pci_dev->bus); + const struct cpumask *numa_mask = cpumask_of_node(numa_node); struct efx_channel *channel; unsigned int cpu; + /* If no online CPUs in local node, fallback to any online CPU */ + if (cpumask_first_and(cpu_online_mask, numa_mask) >= nr_cpu_ids) + numa_mask = cpu_online_mask; + + cpu = -1; efx_for_each_channel(channel, efx) { - cpu = cpumask_local_spread(channel->channel, - pcibus_to_node(efx->pci_dev->bus)); + cpu = cpumask_next_and(cpu, cpu_online_mask, numa_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first_and(cpu_online_mask, numa_mask); irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); } } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index cc15ee8812d9..c75dc75e2857 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1282,6 +1282,7 @@ struct efx_udp_tunnel { * @udp_tnl_has_port: Check if a port has been added as UDP tunnel * @print_additional_fwver: Dump NIC-specific additional FW version info * @sensor_event: Handle a sensor event from MCDI + * @rx_recycle_ring_size: Size of the RX recycle ring * @revision: Hardware architecture revision * @txd_ptr_tbl_base: TX descriptor ring base address * @rxd_ptr_tbl_base: RX descriptor ring base address @@ -1460,6 +1461,7 @@ struct efx_nic_type { size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf, size_t len); void (*sensor_event)(struct efx_nic *efx, efx_qword_t *ev); + unsigned int (*rx_recycle_ring_size)(const struct efx_nic *efx); int revision; unsigned int txd_ptr_tbl_base; diff --git a/drivers/net/ethernet/sfc/nic_common.h b/drivers/net/ethernet/sfc/nic_common.h index b9cafe9cd568..0cef35c0c559 100644 --- a/drivers/net/ethernet/sfc/nic_common.h +++ b/drivers/net/ethernet/sfc/nic_common.h @@ -195,6 +195,11 @@ static inline void efx_sensor_event(struct efx_nic *efx, efx_qword_t *ev) efx->type->sensor_event(efx, ev); } +static inline unsigned int efx_rx_recycle_ring_size(const struct efx_nic *efx) +{ + return efx->type->rx_recycle_ring_size(efx); +} + /* Some statistics are computed as A - B where A and B each increase * linearly with some hardware counter(s) and the counters are read * asynchronously. If the counters contributing to B are always read diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 633ca77a26fd..1b22c7be0088 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -23,13 +23,6 @@ module_param(rx_refill_threshold, uint, 0444); MODULE_PARM_DESC(rx_refill_threshold, "RX descriptor ring refill threshold (%)"); -/* Number of RX buffers to recycle pages for. When creating the RX page recycle - * ring, this number is divided by the number of buffers per page to calculate - * the number of pages to store in the RX page recycle ring. - */ -#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 -#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) - /* RX maximum head room required. * * This must be at least 1 to prevent overflow, plus one packet-worth @@ -141,16 +134,7 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) unsigned int bufs_in_recycle_ring, page_ring_size; struct efx_nic *efx = rx_queue->efx; - /* Set the RX recycle ring size */ -#ifdef CONFIG_PPC64 - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; -#else - if (iommu_present(&pci_bus_type)) - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; - else - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; -#endif /* CONFIG_PPC64 */ - + bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx); page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h index 207ccd8ba062..fbd2769307f9 100644 --- a/drivers/net/ethernet/sfc/rx_common.h +++ b/drivers/net/ethernet/sfc/rx_common.h @@ -18,6 +18,12 @@ #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ EFX_RX_USR_BUF_SIZE) +/* Number of RX buffers to recycle pages for. When creating the RX page recycle + * ring, this number is divided by the number of buffers per page to calculate + * the number of pages to store in the RX page recycle ring. + */ +#define EFX_RECYCLE_RING_SIZE_10G 256 + static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) { return page_address(buf->page) + buf->page_offset; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 16347a6d0c47..ce3060e15b54 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -25,6 +25,7 @@ #include "mcdi_port_common.h" #include "selftest.h" #include "siena_sriov.h" +#include "rx_common.h" /* Hardware control for SFC9000 family including SFL9021 (aka Siena). */ @@ -958,6 +959,12 @@ static unsigned int siena_check_caps(const struct efx_nic *efx, return 0; } +static unsigned int efx_siena_recycle_ring_size(const struct efx_nic *efx) +{ + /* Maximum link speed is 10G */ + return EFX_RECYCLE_RING_SIZE_10G; +} + /************************************************************************** * * Revision-dependent attributes used by efx.c and nic.c @@ -1098,4 +1105,5 @@ const struct efx_nic_type siena_a0_nic_type = { .rx_hash_key_size = 16, .check_caps = siena_check_caps, .sensor_event = efx_mcdi_sensor_event, + .rx_recycle_ring_size = efx_siena_recycle_ring_size, }; diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 556bd353dd42..b0c5a44785fa 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1044,7 +1044,7 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) "rx failed to build skb\n"); break; } - page_pool_release_page(dring->page_pool, page); + skb_mark_for_recycle(skb); skb_reserve(skb, xdp.data - xdp.data_hard_start); skb_put(skb, xdp.data_end - xdp.data); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 8e8778cfbbad..63754a9c4ba7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -383,10 +383,10 @@ static int intel_crosststamp(ktime_t *device, /* Repeat until the timestamps are from the FIFO last segment */ for (i = 0; i < num_snapshot; i++) { - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_ptptime(priv, ptpaddr, &ptp_time); *device = ns_to_ktime(ptp_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); get_arttime(priv->mii, intel_priv->mdio_adhoc_addr, &art_time); *system = convert_art_to_tsc(art_time); } @@ -721,6 +721,7 @@ static int tgl_common_data(struct pci_dev *pdev, plat->rx_queues_to_use = 6; plat->tx_queues_to_use = 4; plat->clk_ptp_rate = 200000000; + plat->speed_mode_2500 = intel_speed_mode_2500; plat->safety_feat_cfg->tsoee = 1; plat->safety_feat_cfg->mrxpee = 0; @@ -740,7 +741,6 @@ static int tgl_sgmii_phy0_data(struct pci_dev *pdev, { plat->bus_id = 1; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; - plat->speed_mode_2500 = intel_speed_mode_2500; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); @@ -755,7 +755,6 @@ static int tgl_sgmii_phy1_data(struct pci_dev *pdev, { plat->bus_id = 2; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; - plat->speed_mode_2500 = intel_speed_mode_2500; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; return tgl_common_data(pdev, plat); @@ -1160,6 +1159,7 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, #define PCI_DEVICE_ID_INTEL_TGL_SGMII1G 0xa0ac #define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_0 0x7aac #define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_1 0x7aad +#define PCI_DEVICE_ID_INTEL_ADLN_SGMII1G 0x54ac static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, QUARK, &quark_info) }, @@ -1177,6 +1177,7 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1, &tgl_sgmii1g_phy1_info) }, { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0, &adls_sgmii1g_phy0_info) }, { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1, &adls_sgmii1g_phy1_info) }, + { PCI_DEVICE_DATA(INTEL, ADLN_SGMII1G, &tgl_sgmii1g_phy0_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2ffa0a11eea5..0cc28c79cc61 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -78,6 +78,7 @@ struct ethqos_emac_por { struct ethqos_emac_driver_data { const struct ethqos_emac_por *por; unsigned int num_por; + bool rgmii_config_looback_en; }; struct qcom_ethqos { @@ -90,6 +91,7 @@ struct qcom_ethqos { const struct ethqos_emac_por *por; unsigned int num_por; + bool rgmii_config_looback_en; }; static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset) @@ -181,6 +183,22 @@ static const struct ethqos_emac_por emac_v2_3_0_por[] = { static const struct ethqos_emac_driver_data emac_v2_3_0_data = { .por = emac_v2_3_0_por, .num_por = ARRAY_SIZE(emac_v2_3_0_por), + .rgmii_config_looback_en = true, +}; + +static const struct ethqos_emac_por emac_v2_1_0_por[] = { + { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40C01343 }, + { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642C }, + { .offset = SDCC_HC_REG_DDR_CONFIG, .value = 0x00000000 }, + { .offset = SDCC_HC_REG_DLL_CONFIG2, .value = 0x00200000 }, + { .offset = SDCC_USR_CTL, .value = 0x00010800 }, + { .offset = RGMII_IO_MACRO_CONFIG2, .value = 0x00002060 }, +}; + +static const struct ethqos_emac_driver_data emac_v2_1_0_data = { + .por = emac_v2_1_0_por, + .num_por = ARRAY_SIZE(emac_v2_1_0_por), + .rgmii_config_looback_en = false, }; static int ethqos_dll_configure(struct qcom_ethqos *ethqos) @@ -297,8 +315,12 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN, SDCC_DDR_CONFIG_PRG_DLY_EN, SDCC_HC_REG_DDR_CONFIG); - rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, - RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + if (ethqos->rgmii_config_looback_en) + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + else + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + 0, RGMII_IO_MACRO_CONFIG); break; case SPEED_100: @@ -331,8 +353,13 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, SDCC_HC_REG_DDR_CONFIG); - rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, - RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + if (ethqos->rgmii_config_looback_en) + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + else + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + 0, RGMII_IO_MACRO_CONFIG); + break; case SPEED_10: @@ -504,6 +531,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) data = of_device_get_match_data(&pdev->dev); ethqos->por = data->por; ethqos->num_por = data->num_por; + ethqos->rgmii_config_looback_en = data->rgmii_config_looback_en; ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii"); if (IS_ERR(ethqos->rgmii_clk)) { @@ -558,6 +586,7 @@ static int qcom_ethqos_remove(struct platform_device *pdev) static const struct of_device_id qcom_ethqos_match[] = { { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data}, + { .compatible = "qcom,sm8150-ethqos", .data = &emac_v2_1_0_data}, { } }; MODULE_DEVICE_TABLE(of, qcom_ethqos_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 09644ab0d87a..f86cc83003f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -16,6 +16,7 @@ #include <linux/of_net.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/regulator/consumer.h> #include <linux/regmap.h> #include <linux/stmmac.h> @@ -57,7 +58,6 @@ struct emac_variant { }; /* struct sunxi_priv_data - hold all sunxi private data - * @tx_clk: reference to MAC TX clock * @ephy_clk: reference to the optional EPHY clock for the internal PHY * @regulator: reference to the optional regulator * @rst_ephy: reference to the optional EPHY reset for the internal PHY @@ -68,7 +68,6 @@ struct emac_variant { * @mux_handle: Internal pointer used by mdio-mux lib */ struct sunxi_priv_data { - struct clk *tx_clk; struct clk *ephy_clk; struct regulator *regulator; struct reset_control *rst_ephy; @@ -579,22 +578,14 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) } } - ret = clk_prepare_enable(gmac->tx_clk); - if (ret) { - dev_err(&pdev->dev, "Could not enable AHB clock\n"); - goto err_disable_regulator; - } - if (gmac->use_internal_phy) { ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev)); if (ret) - goto err_disable_clk; + goto err_disable_regulator; } return 0; -err_disable_clk: - clk_disable_unprepare(gmac->tx_clk); err_disable_regulator: if (gmac->regulator) regulator_disable(gmac->regulator); @@ -1043,8 +1034,6 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) if (gmac->variant->soc_has_internal_phy) sun8i_dwmac_unpower_internal_phy(gmac); - clk_disable_unprepare(gmac->tx_clk); - if (gmac->regulator) regulator_disable(gmac->regulator); } @@ -1167,12 +1156,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) return -EINVAL; } - gmac->tx_clk = devm_clk_get(dev, "stmmaceth"); - if (IS_ERR(gmac->tx_clk)) { - dev_err(dev, "Could not get TX clock\n"); - return PTR_ERR(gmac->tx_clk); - } - /* Optional regulator for PHY */ gmac->regulator = devm_regulator_get_optional(dev, "phy"); if (IS_ERR(gmac->regulator)) { @@ -1254,6 +1237,12 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ndev = dev_get_drvdata(&pdev->dev); priv = netdev_priv(ndev); + /* the MAC is runtime suspended after stmmac_dvr_probe(), so we + * need to ensure the MAC resume back before other operations such + * as reset. + */ + pm_runtime_get_sync(&pdev->dev); + /* The mux must be registered after parent MDIO * so after stmmac_dvr_probe() */ @@ -1272,12 +1261,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) goto dwmac_remove; } + pm_runtime_put(&pdev->dev); + return 0; dwmac_mux: reset_control_put(gmac->rst_ephy); clk_put(gmac->ephy_clk); dwmac_remove: + pm_runtime_put_noidle(&pdev->dev); stmmac_dvr_remove(&pdev->dev); dwmac_exit: sun8i_dwmac_exit(pdev, gmac); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 5b195d5051d6..57970ae2178d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -263,7 +263,7 @@ struct stmmac_priv { u32 adv_ts; int use_riwt; int irq_wake; - spinlock_t ptp_lock; + rwlock_t ptp_lock; /* Protects auxiliary snapshot registers from concurrent access. */ struct mutex aux_ts_lock; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index a7ec9f4d46ce..22fea0f67245 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -196,9 +196,9 @@ static void timestamp_interrupt(struct stmmac_priv *priv) GMAC_TIMESTAMP_ATSNS_SHIFT; for (i = 0; i < num_snapshot; i++) { - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); get_ptptime(priv->ptpaddr, &ptp_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); event.type = PTP_CLOCK_EXTTS; event.index = 0; event.timestamp = ptp_time; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 422e3225f476..c1bfd89a5a1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -938,105 +938,15 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex) priv->pause, tx_cnt); } -static void stmmac_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state) +static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); - __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, }; - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - int tx_cnt = priv->plat->tx_queues_to_use; - int max_speed = priv->plat->max_speed; - - phylink_set(mac_supported, 10baseT_Half); - phylink_set(mac_supported, 10baseT_Full); - phylink_set(mac_supported, 100baseT_Half); - phylink_set(mac_supported, 100baseT_Full); - phylink_set(mac_supported, 1000baseT_Half); - phylink_set(mac_supported, 1000baseT_Full); - phylink_set(mac_supported, 1000baseKX_Full); - - phylink_set(mac_supported, Autoneg); - phylink_set(mac_supported, Pause); - phylink_set(mac_supported, Asym_Pause); - phylink_set_port_modes(mac_supported); - - /* Cut down 1G if asked to */ - if ((max_speed > 0) && (max_speed < 1000)) { - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); - } else if (priv->plat->has_gmac4) { - if (!max_speed || max_speed >= 2500) { - phylink_set(mac_supported, 2500baseT_Full); - phylink_set(mac_supported, 2500baseX_Full); - } - } else if (priv->plat->has_xgmac) { - if (!max_speed || (max_speed >= 2500)) { - phylink_set(mac_supported, 2500baseT_Full); - phylink_set(mac_supported, 2500baseX_Full); - } - if (!max_speed || (max_speed >= 5000)) { - phylink_set(mac_supported, 5000baseT_Full); - } - if (!max_speed || (max_speed >= 10000)) { - phylink_set(mac_supported, 10000baseSR_Full); - phylink_set(mac_supported, 10000baseLR_Full); - phylink_set(mac_supported, 10000baseER_Full); - phylink_set(mac_supported, 10000baseLRM_Full); - phylink_set(mac_supported, 10000baseT_Full); - phylink_set(mac_supported, 10000baseKX4_Full); - phylink_set(mac_supported, 10000baseKR_Full); - } - if (!max_speed || (max_speed >= 25000)) { - phylink_set(mac_supported, 25000baseCR_Full); - phylink_set(mac_supported, 25000baseKR_Full); - phylink_set(mac_supported, 25000baseSR_Full); - } - if (!max_speed || (max_speed >= 40000)) { - phylink_set(mac_supported, 40000baseKR4_Full); - phylink_set(mac_supported, 40000baseCR4_Full); - phylink_set(mac_supported, 40000baseSR4_Full); - phylink_set(mac_supported, 40000baseLR4_Full); - } - if (!max_speed || (max_speed >= 50000)) { - phylink_set(mac_supported, 50000baseCR2_Full); - phylink_set(mac_supported, 50000baseKR2_Full); - phylink_set(mac_supported, 50000baseSR2_Full); - phylink_set(mac_supported, 50000baseKR_Full); - phylink_set(mac_supported, 50000baseSR_Full); - phylink_set(mac_supported, 50000baseCR_Full); - phylink_set(mac_supported, 50000baseLR_ER_FR_Full); - phylink_set(mac_supported, 50000baseDR_Full); - } - if (!max_speed || (max_speed >= 100000)) { - phylink_set(mac_supported, 100000baseKR4_Full); - phylink_set(mac_supported, 100000baseSR4_Full); - phylink_set(mac_supported, 100000baseCR4_Full); - phylink_set(mac_supported, 100000baseLR4_ER4_Full); - phylink_set(mac_supported, 100000baseKR2_Full); - phylink_set(mac_supported, 100000baseSR2_Full); - phylink_set(mac_supported, 100000baseCR2_Full); - phylink_set(mac_supported, 100000baseLR2_ER2_FR2_Full); - phylink_set(mac_supported, 100000baseDR2_Full); - } - } - - /* Half-Duplex can only work with single queue */ - if (tx_cnt > 1) { - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 1000baseT_Half); - } - - linkmode_and(supported, supported, mac_supported); - linkmode_andnot(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mac_supported); - linkmode_andnot(state->advertising, state->advertising, mask); + if (!priv->hw->xpcs) + return NULL; - /* If PCS is supported, check which modes it supports. */ - if (priv->hw->xpcs) - xpcs_validate(priv->hw->xpcs, supported, state); + return &priv->hw->xpcs->pcs; } static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, @@ -1175,7 +1085,8 @@ static void stmmac_mac_link_up(struct phylink_config *config, } static const struct phylink_mac_ops stmmac_phylink_mac_ops = { - .validate = stmmac_validate, + .validate = phylink_generic_validate, + .mac_select_pcs = stmmac_mac_select_pcs, .mac_config = stmmac_mac_config, .mac_link_down = stmmac_mac_link_down, .mac_link_up = stmmac_mac_link_up, @@ -1255,12 +1166,12 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node); + int max_speed = priv->plat->max_speed; int mode = priv->plat->phy_interface; struct phylink *phylink; priv->phylink_config.dev = &priv->dev->dev; priv->phylink_config.type = PHYLINK_NETDEV; - priv->phylink_config.pcs_poll = true; if (priv->plat->mdio_bus_data) priv->phylink_config.ovr_an_inband = mdio_bus_data->xpcs_an_inband; @@ -1268,14 +1179,50 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) if (!fwnode) fwnode = dev_fwnode(priv->device); + /* Set the platform/firmware specified interface mode */ + __set_bit(mode, priv->phylink_config.supported_interfaces); + + /* If we have an xpcs, it defines which PHY interfaces are supported. */ + if (priv->hw->xpcs) + xpcs_get_interfaces(priv->hw->xpcs, + priv->phylink_config.supported_interfaces); + + priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100; + + if (!max_speed || max_speed >= 1000) + priv->phylink_config.mac_capabilities |= MAC_1000; + + if (priv->plat->has_gmac4) { + if (!max_speed || max_speed >= 2500) + priv->phylink_config.mac_capabilities |= MAC_2500FD; + } else if (priv->plat->has_xgmac) { + if (!max_speed || max_speed >= 2500) + priv->phylink_config.mac_capabilities |= MAC_2500FD; + if (!max_speed || max_speed >= 5000) + priv->phylink_config.mac_capabilities |= MAC_5000FD; + if (!max_speed || max_speed >= 10000) + priv->phylink_config.mac_capabilities |= MAC_10000FD; + if (!max_speed || max_speed >= 25000) + priv->phylink_config.mac_capabilities |= MAC_25000FD; + if (!max_speed || max_speed >= 40000) + priv->phylink_config.mac_capabilities |= MAC_40000FD; + if (!max_speed || max_speed >= 50000) + priv->phylink_config.mac_capabilities |= MAC_50000FD; + if (!max_speed || max_speed >= 100000) + priv->phylink_config.mac_capabilities |= MAC_100000FD; + } + + /* Half-Duplex can only work with single queue */ + if (priv->plat->tx_queues_to_use > 1) + priv->phylink_config.mac_capabilities &= + ~(MAC_10HD | MAC_100HD | MAC_1000HD); + phylink = phylink_create(&priv->phylink_config, fwnode, mode, &stmmac_phylink_mac_ops); if (IS_ERR(phylink)) return PTR_ERR(phylink); - if (priv->hw->xpcs) - phylink_set_pcs(phylink, &priv->hw->xpcs->pcs); - priv->phylink = phylink; return 0; } @@ -3328,7 +3275,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) ret = stmmac_init_ptp(priv); if (ret == -EOPNOTSUPP) - netdev_warn(priv->dev, "PTP not supported by HW\n"); + netdev_info(priv->dev, "PTP not supported by HW\n"); else if (ret) netdev_warn(priv->dev, "PTP init failed\n"); else if (ptp_register) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 1c9f02f9c317..e45fb191d8e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -39,9 +39,9 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) diff = div_u64(adj, 1000000000ULL); addend = neg_adj ? (addend - diff) : (addend + diff); - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); stmmac_config_addend(priv, priv->ptpaddr, addend); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } @@ -86,9 +86,9 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) mutex_unlock(&priv->plat->est->lock); } - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); /* Caculate new basetime and re-configured EST after PTP time adjust. */ if (est_rst) { @@ -137,9 +137,9 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts) unsigned long flags; u64 ns = 0; - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_systime(priv, priv->ptpaddr, &ns); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); *ts = ns_to_timespec64(ns); @@ -162,9 +162,9 @@ static int stmmac_set_time(struct ptp_clock_info *ptp, container_of(ptp, struct stmmac_priv, ptp_clock_ops); unsigned long flags; - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); stmmac_init_systime(priv, priv->ptpaddr, ts->tv_sec, ts->tv_nsec); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } @@ -194,12 +194,12 @@ static int stmmac_enable(struct ptp_clock_info *ptp, cfg->period.tv_sec = rq->perout.period.sec; cfg->period.tv_nsec = rq->perout.period.nsec; - spin_lock_irqsave(&priv->ptp_lock, flags); + write_lock_irqsave(&priv->ptp_lock, flags); ret = stmmac_flex_pps_config(priv, priv->ioaddr, rq->perout.index, cfg, on, priv->sub_second_inc, priv->systime_flags); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + write_unlock_irqrestore(&priv->ptp_lock, flags); break; case PTP_CLK_REQ_EXTTS: priv->plat->ext_snapshot_en = on; @@ -314,7 +314,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv) stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num; stmmac_ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n; - spin_lock_init(&priv->ptp_lock); + rwlock_init(&priv->ptp_lock); mutex_init(&priv->aux_ts_lock); priv->ptp_clock_ops = stmmac_ptp_clock_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index be3cb63675a5..9f1759593b94 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -1777,9 +1777,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv) if (ret) return ret; - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_systime(priv, priv->ptpaddr, &curr_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); if (!curr_time) { ret = -EOPNOTSUPP; @@ -1799,9 +1799,9 @@ static int stmmac_test_tbs(struct stmmac_priv *priv) goto fail_disable; /* Check if expected time has elapsed */ - spin_lock_irqsave(&priv->ptp_lock, flags); + read_lock_irqsave(&priv->ptp_lock, flags); stmmac_get_systime(priv, priv->ptpaddr, &curr_time); - spin_unlock_irqrestore(&priv->ptp_lock, flags); + read_unlock_irqrestore(&priv->ptp_lock, flags); if ((curr_time - start_time) < STMMAC_TBS_LT_OFFSET) ret = -EINVAL; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index dba9f12efa1c..b04a6a7bf566 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -88,6 +88,7 @@ #include <asm/io.h> #include <asm/byteorder.h> #include <linux/uaccess.h> +#include <linux/jiffies.h> #define cas_page_map(x) kmap_atomic((x)) #define cas_page_unmap(x) kunmap_atomic((x)) @@ -1234,19 +1235,6 @@ static void cas_init_rx_dma(struct cas *cp) */ readl(cp->regs + REG_INTR_STATUS_ALIAS); writel(INTR_RX_DONE | INTR_RX_BUF_UNAVAIL, cp->regs + REG_ALIAS_CLEAR); - if (cp->cas_flags & CAS_FLAG_REG_PLUS) { - for (i = 1; i < N_RX_COMP_RINGS; i++) - readl(cp->regs + REG_PLUS_INTRN_STATUS_ALIAS(i)); - - /* 2 is different from 3 and 4 */ - if (N_RX_COMP_RINGS > 1) - writel(INTR_RX_DONE_ALT | INTR_RX_BUF_UNAVAIL_1, - cp->regs + REG_PLUS_ALIASN_CLEAR(1)); - - for (i = 2; i < N_RX_COMP_RINGS; i++) - writel(INTR_RX_DONE_ALT, - cp->regs + REG_PLUS_ALIASN_CLEAR(i)); - } /* set up pause thresholds */ val = CAS_BASE(RX_PAUSE_THRESH_OFF, @@ -3508,9 +3496,6 @@ enable_rx_done: if (N_RX_DESC_RINGS > 1) writel(RX_DESC_RINGN_SIZE(1) - 4, cp->regs + REG_PLUS_RX_KICK1); - - for (i = 1; i < N_RX_COMP_RINGS; i++) - writel(0, cp->regs + REG_PLUS_RX_COMPN_TAIL(i)); } } @@ -4063,8 +4048,8 @@ static void cas_link_timer(struct timer_list *t) if (link_transition_timeout != 0 && cp->link_transition_jiffies_valid && - ((jiffies - cp->link_transition_jiffies) > - (link_transition_timeout))) { + time_is_before_jiffies(cp->link_transition_jiffies + + link_transition_timeout)) { /* One-second counter so link-down workaround doesn't * cause resets to occur so fast as to fool the switch * into thinking the link is down. @@ -4679,7 +4664,7 @@ static void cas_set_msglevel(struct net_device *dev, u32 value) static int cas_get_regs_len(struct net_device *dev) { struct cas *cp = netdev_priv(dev); - return cp->casreg_len < CAS_MAX_REGS ? cp->casreg_len: CAS_MAX_REGS; + return min_t(int, cp->casreg_len, CAS_MAX_REGS); } static void cas_get_regs(struct net_device *dev, struct ethtool_regs *regs, diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c index 599708a3e81d..d4c56da98a6a 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c @@ -237,15 +237,11 @@ static int am65_cpsw_port_vlans_add(struct am65_cpsw_port *port, { bool untag = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct net_device *orig_dev = vlan->obj.orig_dev; - bool cpu_port = netif_is_bridge_master(orig_dev); bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; netdev_dbg(port->ndev, "VID add: %s: vid:%u flags:%X\n", port->ndev->name, vlan->vid, vlan->flags); - if (cpu_port && !(vlan->flags & BRIDGE_VLAN_INFO_BRENTRY)) - return 0; - return am65_cpsw_port_vlan_add(port, untag, pvid, vlan->vid, orig_dev); } diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index a7d97d429e06..ce85f7610273 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -252,15 +252,11 @@ static int cpsw_port_vlans_add(struct cpsw_priv *priv, { bool untag = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct net_device *orig_dev = vlan->obj.orig_dev; - bool cpu_port = netif_is_bridge_master(orig_dev); bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; dev_dbg(priv->dev, "VID add: %s: vid:%u flags:%X\n", priv->ndev->name, vlan->vid, vlan->flags); - if (cpu_port && !(vlan->flags & BRIDGE_VLAN_INFO_BRENTRY)) - return 0; - return cpsw_port_vlan_add(priv, untag, pvid, vlan->vid, orig_dev); } diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 89a31783fbb4..eb39a45de012 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -362,7 +362,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) mse102x_dump_packet(__func__, skb->len, skb->data); skb->protocol = eth_type_trans(skb, mse->ndev); - netif_rx_ni(skb); + netif_rx(skb); mse->ndev->stats.rx_packets++; mse->ndev->stats.rx_bytes += rxlen; @@ -731,7 +731,7 @@ static int mse102x_probe_spi(struct spi_device *spi) return 0; } -static int mse102x_remove_spi(struct spi_device *spi) +static void mse102x_remove_spi(struct spi_device *spi) { struct mse102x_net *mse = dev_get_drvdata(&spi->dev); struct mse102x_net_spi *mses = to_mse102x_spi(mse); @@ -741,8 +741,6 @@ static int mse102x_remove_spi(struct spi_device *spi) mse102x_remove_device_debugfs(mses); unregister_netdev(mse->ndev); - - return 0; } static const struct of_device_id mse102x_match_table[] = { diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 7779a36da3c8..7c52796273a4 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -461,11 +461,9 @@ static int w5100_spi_probe(struct spi_device *spi) return w5100_probe(&spi->dev, ops, priv_size, mac, spi->irq, -EINVAL); } -static int w5100_spi_remove(struct spi_device *spi) +static void w5100_spi_remove(struct spi_device *spi) { w5100_remove(&spi->dev); - - return 0; } static const struct spi_device_id w5100_spi_ids[] = { diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index ae24d6b86803..4fd7c39e1123 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -883,7 +883,7 @@ static void w5100_rx_work(struct work_struct *work) struct sk_buff *skb; while ((skb = w5100_rx_skb(priv->ndev))) - netif_rx_ni(skb); + netif_rx(skb); w5100_enable_intr(priv); } diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 911b5ef9e680..0014729b8865 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # -# Xilink device configuration +# Xilinx device configuration # config NET_VENDOR_XILINX diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 4a73127e10a6..c6395c406418 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -271,7 +271,7 @@ This option defaults to enabled (set) */ #define XTE_TIE_OFFSET 0x000003A4 /* Interrupt enable */ -/** MII Mamagement Control register (MGTCR) */ +/* MII Management Control register (MGTCR) */ #define XTE_MGTDR_OFFSET 0x000003B0 /* MII data */ #define XTE_MIIMAI_OFFSET 0x000003B4 /* MII control */ @@ -283,7 +283,7 @@ This option defaults to enabled (set) */ #define STS_CTRL_APP0_ERR (1 << 31) #define STS_CTRL_APP0_IRQONEND (1 << 30) -/* undoccumented */ +/* undocumented */ #define STS_CTRL_APP0_STOPONEND (1 << 29) #define STS_CTRL_APP0_CMPLT (1 << 28) #define STS_CTRL_APP0_SOP (1 << 27) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 64c7e26c3b75..869e362e09c1 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -361,8 +361,9 @@ static int temac_dma_bd_init(struct net_device *ndev) lp->rx_bd_v[i].next = cpu_to_be32(lp->rx_bd_p + sizeof(*lp->rx_bd_v) * ((i + 1) % lp->rx_bd_num)); - skb = netdev_alloc_skb_ip_align(ndev, - XTE_MAX_JUMBO_FRAME_SIZE); + skb = __netdev_alloc_skb_ip_align(ndev, + XTE_MAX_JUMBO_FRAME_SIZE, + GFP_KERNEL); if (!skb) goto out; @@ -1008,7 +1009,7 @@ static void ll_temac_recv(struct net_device *ndev) (skb->len > 64)) { /* Convert from device endianness (be32) to cpu - * endiannes, and if necessary swap the bytes + * endianness, and if necessary swap the bytes * (back) for proper IP checksum byte order * (be16). */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 5b4d153b1492..0f9c88dd1a4a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -119,11 +119,11 @@ #define XAXIDMA_IRQ_ERROR_MASK 0x00004000 /* Error interrupt */ #define XAXIDMA_IRQ_ALL_MASK 0x00007000 /* All interrupts */ -/* Default TX/RX Threshold and waitbound values for SGDMA mode */ +/* Default TX/RX Threshold and delay timer values for SGDMA mode */ #define XAXIDMA_DFT_TX_THRESHOLD 24 -#define XAXIDMA_DFT_TX_WAITBOUND 254 -#define XAXIDMA_DFT_RX_THRESHOLD 24 -#define XAXIDMA_DFT_RX_WAITBOUND 254 +#define XAXIDMA_DFT_TX_USEC 50 +#define XAXIDMA_DFT_RX_THRESHOLD 1 +#define XAXIDMA_DFT_RX_USEC 50 #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ @@ -385,7 +385,9 @@ struct axidma_bd { * @phy_node: Pointer to device node structure * @phylink: Pointer to phylink instance * @phylink_config: phylink configuration settings + * @napi: NAPI control structure * @pcs_phy: Reference to PCS/PMA PHY if used + * @pcs: phylink pcs structure for PCS PHY * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core * @axi_clk: AXI4-Lite bus clock * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks) @@ -394,6 +396,7 @@ struct axidma_bd { * @regs_start: Resource start for axienet device addresses * @regs: Base address for the axienet_local device address space * @dma_regs: Base address for the axidma device address space + * @rx_dma_cr: Nominal content of RX DMA control register * @dma_err_task: Work structure to process Axi DMA errors * @tx_irq: Axidma TX IRQ number * @rx_irq: Axidma RX IRQ number @@ -422,7 +425,9 @@ struct axidma_bd { * @csum_offload_on_tx_path: Stores the checksum selection on TX side. * @csum_offload_on_rx_path: Stores the checksum selection on RX side. * @coalesce_count_rx: Store the irq coalesce on RX side. + * @coalesce_usec_rx: IRQ coalesce delay for RX * @coalesce_count_tx: Store the irq coalesce on TX side. + * @coalesce_usec_tx: IRQ coalesce delay for TX */ struct axienet_local { struct net_device *ndev; @@ -433,7 +438,10 @@ struct axienet_local { struct phylink *phylink; struct phylink_config phylink_config; + struct napi_struct napi; + struct mdio_device *pcs_phy; + struct phylink_pcs pcs; bool switch_x_sgmii; @@ -447,6 +455,8 @@ struct axienet_local { void __iomem *regs; void __iomem *dma_regs; + u32 rx_dma_cr; + struct work_struct dma_err_task; int tx_irq; @@ -474,7 +484,9 @@ struct axienet_local { int csum_offload_on_rx_path; u32 coalesce_count_rx; + u32 coalesce_usec_rx; u32 coalesce_count_tx; + u32 coalesce_usec_tx; }; /** diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 377c94ec2486..c7eb05e4a6bf 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -7,7 +7,7 @@ * Copyright (c) 2008-2009 Secret Lab Technologies Ltd. * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu> * Copyright (c) 2010 - 2011 PetaLogix - * Copyright (c) 2019 SED Systems, a division of Calian Ltd. + * Copyright (c) 2019 - 2022 Calian Advanced Technologies * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved. * * This is a driver for the Xilinx Axi Ethernet which is used in the Virtex6 @@ -33,7 +33,7 @@ #include <linux/of_irq.h> #include <linux/of_address.h> #include <linux/skbuff.h> -#include <linux/spinlock.h> +#include <linux/math64.h> #include <linux/phy.h> #include <linux/mii.h> #include <linux/ethtool.h> @@ -190,7 +190,7 @@ static void axienet_dma_bd_release(struct net_device *ndev) struct axienet_local *lp = netdev_priv(ndev); /* If we end up here, tx_bd_v must have been DMA allocated. */ - dma_free_coherent(ndev->dev.parent, + dma_free_coherent(lp->dev, sizeof(*lp->tx_bd_v) * lp->tx_bd_num, lp->tx_bd_v, lp->tx_bd_p); @@ -215,18 +215,90 @@ static void axienet_dma_bd_release(struct net_device *ndev) */ if (lp->rx_bd_v[i].cntrl) { phys = desc_get_phys_addr(lp, &lp->rx_bd_v[i]); - dma_unmap_single(ndev->dev.parent, phys, + dma_unmap_single(lp->dev, phys, lp->max_frm_size, DMA_FROM_DEVICE); } } - dma_free_coherent(ndev->dev.parent, + dma_free_coherent(lp->dev, sizeof(*lp->rx_bd_v) * lp->rx_bd_num, lp->rx_bd_v, lp->rx_bd_p); } /** + * axienet_usec_to_timer - Calculate IRQ delay timer value + * @lp: Pointer to the axienet_local structure + * @coalesce_usec: Microseconds to convert into timer value + */ +static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec) +{ + u32 result; + u64 clk_rate = 125000000; /* arbitrary guess if no clock rate set */ + + if (lp->axi_clk) + clk_rate = clk_get_rate(lp->axi_clk); + + /* 1 Timeout Interval = 125 * (clock period of SG clock) */ + result = DIV64_U64_ROUND_CLOSEST((u64)coalesce_usec * clk_rate, + (u64)125000000); + if (result > 255) + result = 255; + + return result; +} + +/** + * axienet_dma_start - Set up DMA registers and start DMA operation + * @lp: Pointer to the axienet_local structure + */ +static void axienet_dma_start(struct axienet_local *lp) +{ + u32 tx_cr; + + /* Start updating the Rx channel control register */ + lp->rx_dma_cr = (lp->coalesce_count_rx << XAXIDMA_COALESCE_SHIFT) | + XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; + /* Only set interrupt delay timer if not generating an interrupt on + * the first RX packet. Otherwise leave at 0 to disable delay interrupt. + */ + if (lp->coalesce_count_rx > 1) + lp->rx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_rx) + << XAXIDMA_DELAY_SHIFT) | + XAXIDMA_IRQ_DELAY_MASK; + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + + /* Start updating the Tx channel control register */ + tx_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) | + XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; + /* Only set interrupt delay timer if not generating an interrupt on + * the first TX packet. Otherwise leave at 0 to disable delay interrupt. + */ + if (lp->coalesce_count_tx > 1) + tx_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx) + << XAXIDMA_DELAY_SHIFT) | + XAXIDMA_IRQ_DELAY_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr); + + /* Populate the tail pointer and bring the Rx Axi DMA engine out of + * halted state. This will make the Rx side ready for reception. + */ + axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); + lp->rx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK; + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + + (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); + + /* Write to the RS (Run-stop) bit in the Tx channel control register. + * Tx channel is now ready to run. But only after we write to the + * tail pointer register that the Tx channel will start transmitting. + */ + axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); + tx_cr |= XAXIDMA_CR_RUNSTOP_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr); +} + +/** * axienet_dma_bd_init - Setup buffer descriptor rings for Axi DMA * @ndev: Pointer to the net_device structure * @@ -238,7 +310,6 @@ static void axienet_dma_bd_release(struct net_device *ndev) */ static int axienet_dma_bd_init(struct net_device *ndev) { - u32 cr; int i; struct sk_buff *skb; struct axienet_local *lp = netdev_priv(ndev); @@ -249,13 +320,13 @@ static int axienet_dma_bd_init(struct net_device *ndev) lp->rx_bd_ci = 0; /* Allocate the Tx and Rx buffer descriptors. */ - lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent, + lp->tx_bd_v = dma_alloc_coherent(lp->dev, sizeof(*lp->tx_bd_v) * lp->tx_bd_num, &lp->tx_bd_p, GFP_KERNEL); if (!lp->tx_bd_v) return -ENOMEM; - lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent, + lp->rx_bd_v = dma_alloc_coherent(lp->dev, sizeof(*lp->rx_bd_v) * lp->rx_bd_num, &lp->rx_bd_p, GFP_KERNEL); if (!lp->rx_bd_v) @@ -285,9 +356,9 @@ static int axienet_dma_bd_init(struct net_device *ndev) goto out; lp->rx_bd_v[i].skb = skb; - addr = dma_map_single(ndev->dev.parent, skb->data, + addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE); - if (dma_mapping_error(ndev->dev.parent, addr)) { + if (dma_mapping_error(lp->dev, addr)) { netdev_err(ndev, "DMA mapping error\n"); goto out; } @@ -296,50 +367,7 @@ static int axienet_dma_bd_init(struct net_device *ndev) lp->rx_bd_v[i].cntrl = lp->max_frm_size; } - /* Start updating the Rx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = ((cr & ~XAXIDMA_COALESCE_MASK) | - ((lp->coalesce_count_rx) << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = ((cr & ~XAXIDMA_DELAY_MASK) | - (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); - - /* Start updating the Tx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = (((cr & ~XAXIDMA_COALESCE_MASK)) | - ((lp->coalesce_count_tx) << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = (((cr & ~XAXIDMA_DELAY_MASK)) | - (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception. - */ - axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); - axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + - (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); - - /* Write to the RS (Run-stop) bit in the Tx channel control register. - * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting. - */ - axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); + axienet_dma_start(lp); return 0; out: @@ -531,13 +559,51 @@ static int __axienet_device_reset(struct axienet_local *lp) } /** + * axienet_dma_stop - Stop DMA operation + * @lp: Pointer to the axienet_local structure + */ +static void axienet_dma_stop(struct axienet_local *lp) +{ + int count; + u32 cr, sr; + + cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); + cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + synchronize_irq(lp->rx_irq); + + cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); + cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + synchronize_irq(lp->tx_irq); + + /* Give DMAs a chance to halt gracefully */ + sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { + msleep(20); + sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + } + + sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { + msleep(20); + sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + } + + /* Do a reset to ensure DMA is really stopped */ + axienet_lock_mii(lp); + __axienet_device_reset(lp); + axienet_unlock_mii(lp); +} + +/** * axienet_device_reset - Reset and initialize the Axi Ethernet hardware. * @ndev: Pointer to the net_device structure * * This function is called to reset and initialize the Axi Ethernet core. This * is typically called during initialization. It does a reset of the Axi DMA * Rx/Tx channels and initializes the Axi DMA BDs. Since Axi DMA reset lines - * areconnected to Axi Ethernet reset lines, this in turn resets the Axi + * are connected to Axi Ethernet reset lines, this in turn resets the Axi * Ethernet core. No separate hardware reset is done for the Axi Ethernet * core. * Returns 0 on success or a negative error number otherwise. @@ -636,7 +702,7 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, /* Ensure we see complete descriptor update */ dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, phys, + dma_unmap_single(lp->dev, phys, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); @@ -774,9 +840,9 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p->app0 |= 2; /* Tx Full Checksum Offload Enabled */ } - phys = dma_map_single(ndev->dev.parent, skb->data, + phys = dma_map_single(lp->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) { + if (unlikely(dma_mapping_error(lp->dev, phys))) { if (net_ratelimit()) netdev_err(ndev, "TX DMA mapping error\n"); ndev->stats.tx_dropped++; @@ -790,11 +856,11 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) lp->tx_bd_tail = 0; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; frag = &skb_shinfo(skb)->frags[ii]; - phys = dma_map_single(ndev->dev.parent, + phys = dma_map_single(lp->dev, skb_frag_address(frag), skb_frag_size(frag), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) { + if (unlikely(dma_mapping_error(lp->dev, phys))) { if (net_ratelimit()) netdev_err(ndev, "TX DMA mapping error\n"); ndev->stats.tx_dropped++; @@ -833,79 +899,84 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } /** - * axienet_recv - Is called from Axi DMA Rx Isr to complete the received - * BD processing. - * @ndev: Pointer to net_device structure. + * axienet_poll - Triggered by RX ISR to complete the received BD processing. + * @napi: Pointer to NAPI structure. + * @budget: Max number of packets to process. * - * This function is invoked from the Axi DMA Rx isr to process the Rx BDs. It - * does minimal processing and invokes "netif_rx" to complete further - * processing. + * Return: Number of RX packets processed. */ -static void axienet_recv(struct net_device *ndev) +static int axienet_poll(struct napi_struct *napi, int budget) { u32 length; u32 csumstatus; u32 size = 0; - u32 packets = 0; + int packets = 0; dma_addr_t tail_p = 0; - struct axienet_local *lp = netdev_priv(ndev); - struct sk_buff *skb, *new_skb; struct axidma_bd *cur_p; + struct sk_buff *skb, *new_skb; + struct axienet_local *lp = container_of(napi, struct axienet_local, napi); cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { + while (packets < budget && (cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { dma_addr_t phys; - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - /* Ensure we see complete descriptor update */ dma_rmb(); - phys = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, - DMA_FROM_DEVICE); skb = cur_p->skb; cur_p->skb = NULL; - length = cur_p->app4 & 0x0000FFFF; - - skb_put(skb, length); - skb->protocol = eth_type_trans(skb, ndev); - /*skb_checksum_none_assert(skb);*/ - skb->ip_summed = CHECKSUM_NONE; - - /* if we're doing Rx csum offload, set it up */ - if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { - csumstatus = (cur_p->app2 & - XAE_FULL_CSUM_STATUS_MASK) >> 3; - if ((csumstatus == XAE_IP_TCP_CSUM_VALIDATED) || - (csumstatus == XAE_IP_UDP_CSUM_VALIDATED)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* skb could be NULL if a previous pass already received the + * packet for this slot in the ring, but failed to refill it + * with a newly allocated buffer. In this case, don't try to + * receive it again. + */ + if (likely(skb)) { + length = cur_p->app4 & 0x0000FFFF; + + phys = desc_get_phys_addr(lp, cur_p); + dma_unmap_single(lp->dev, phys, lp->max_frm_size, + DMA_FROM_DEVICE); + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, lp->ndev); + /*skb_checksum_none_assert(skb);*/ + skb->ip_summed = CHECKSUM_NONE; + + /* if we're doing Rx csum offload, set it up */ + if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { + csumstatus = (cur_p->app2 & + XAE_FULL_CSUM_STATUS_MASK) >> 3; + if (csumstatus == XAE_IP_TCP_CSUM_VALIDATED || + csumstatus == XAE_IP_UDP_CSUM_VALIDATED) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && + skb->protocol == htons(ETH_P_IP) && + skb->len > 64) { + skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); + skb->ip_summed = CHECKSUM_COMPLETE; } - } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && - skb->protocol == htons(ETH_P_IP) && - skb->len > 64) { - skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); - skb->ip_summed = CHECKSUM_COMPLETE; - } - netif_rx(skb); + napi_gro_receive(napi, skb); - size += length; - packets++; + size += length; + packets++; + } - new_skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size); + new_skb = napi_alloc_skb(napi, lp->max_frm_size); if (!new_skb) - return; + break; - phys = dma_map_single(ndev->dev.parent, new_skb->data, + phys = dma_map_single(lp->dev, new_skb->data, lp->max_frm_size, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) { + if (unlikely(dma_mapping_error(lp->dev, phys))) { if (net_ratelimit()) - netdev_err(ndev, "RX DMA mapping error\n"); + netdev_err(lp->ndev, "RX DMA mapping error\n"); dev_kfree_skb(new_skb); - return; + break; } desc_set_phys_addr(lp, phys, cur_p); @@ -913,16 +984,30 @@ static void axienet_recv(struct net_device *ndev) cur_p->status = 0; cur_p->skb = new_skb; + /* Only update tail_p to mark this slot as usable after it has + * been successfully refilled. + */ + tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; + if (++lp->rx_bd_ci >= lp->rx_bd_num) lp->rx_bd_ci = 0; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; } - ndev->stats.rx_packets += packets; - ndev->stats.rx_bytes += size; + lp->ndev->stats.rx_packets += packets; + lp->ndev->stats.rx_bytes += size; if (tail_p) axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); + + if (packets < budget && napi_complete_done(napi, packets)) { + /* Re-enable RX completion interrupts. This should + * cause an immediate interrupt if any RX packets are + * already pending. + */ + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + } + return packets; } /** @@ -937,41 +1022,27 @@ static void axienet_recv(struct net_device *ndev) */ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) { - u32 cr; unsigned int status; struct net_device *ndev = _ndev; struct axienet_local *lp = netdev_priv(ndev); status = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); - if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { - axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); - axienet_start_xmit_done(lp->ndev); - goto out; - } + if (!(status & XAXIDMA_IRQ_ALL_MASK)) return IRQ_NONE; - if (status & XAXIDMA_IRQ_ERROR_MASK) { - dev_err(&ndev->dev, "DMA Tx error 0x%x\n", status); - dev_err(&ndev->dev, "Current BD is at: 0x%x%08x\n", - (lp->tx_bd_v[lp->tx_bd_ci]).phys_msb, - (lp->tx_bd_v[lp->tx_bd_ci]).phys); - - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* Write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* Write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); + + if (unlikely(status & XAXIDMA_IRQ_ERROR_MASK)) { + netdev_err(ndev, "DMA Tx error 0x%x\n", status); + netdev_err(ndev, "Current BD is at: 0x%x%08x\n", + (lp->tx_bd_v[lp->tx_bd_ci]).phys_msb, + (lp->tx_bd_v[lp->tx_bd_ci]).phys); schedule_work(&lp->dma_err_task); - axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); + } else { + axienet_start_xmit_done(lp->ndev); } -out: + return IRQ_HANDLED; } @@ -982,46 +1053,40 @@ out: * * Return: IRQ_HANDLED if device generated a RX interrupt, IRQ_NONE otherwise. * - * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD + * This is the Axi DMA Rx Isr. It invokes NAPI polling to complete the RX BD * processing. */ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) { - u32 cr; unsigned int status; struct net_device *ndev = _ndev; struct axienet_local *lp = netdev_priv(ndev); status = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); - if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { - axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); - axienet_recv(lp->ndev); - goto out; - } + if (!(status & XAXIDMA_IRQ_ALL_MASK)) return IRQ_NONE; - if (status & XAXIDMA_IRQ_ERROR_MASK) { - dev_err(&ndev->dev, "DMA Rx error 0x%x\n", status); - dev_err(&ndev->dev, "Current BD is at: 0x%x%08x\n", - (lp->rx_bd_v[lp->rx_bd_ci]).phys_msb, - (lp->rx_bd_v[lp->rx_bd_ci]).phys); - - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* Finally write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Disable coalesce, delay timer and error interrupts */ - cr &= (~XAXIDMA_IRQ_ALL_MASK); - /* write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); + + if (unlikely(status & XAXIDMA_IRQ_ERROR_MASK)) { + netdev_err(ndev, "DMA Rx error 0x%x\n", status); + netdev_err(ndev, "Current BD is at: 0x%x%08x\n", + (lp->rx_bd_v[lp->rx_bd_ci]).phys_msb, + (lp->rx_bd_v[lp->rx_bd_ci]).phys); schedule_work(&lp->dma_err_task); - axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); + } else { + /* Disable further RX completion interrupts and schedule + * NAPI receive. + */ + u32 cr = lp->rx_dma_cr; + + cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + + napi_schedule(&lp->napi); } -out: + return IRQ_HANDLED; } @@ -1095,6 +1160,8 @@ static int axienet_open(struct net_device *ndev) /* Enable worker thread for Axi DMA error handling */ INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler); + napi_enable(&lp->napi); + /* Enable interrupts for Axi DMA Tx */ ret = request_irq(lp->tx_irq, axienet_tx_irq, IRQF_SHARED, ndev->name, ndev); @@ -1120,6 +1187,7 @@ err_eth_irq: err_rx_irq: free_irq(lp->tx_irq, ndev); err_tx_irq: + napi_disable(&lp->napi); phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); cancel_work_sync(&lp->dma_err_task); @@ -1139,46 +1207,22 @@ err_tx_irq: */ static int axienet_stop(struct net_device *ndev) { - u32 cr, sr; - int count; struct axienet_local *lp = netdev_priv(ndev); dev_dbg(&ndev->dev, "axienet_close()\n"); + napi_disable(&lp->napi); + phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); - - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + axienet_dma_stop(lp); axienet_iow(lp, XAE_IE_OFFSET, 0); - /* Give DMAs a chance to halt gracefully */ - sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); - for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { - msleep(20); - sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); - } - - sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); - for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { - msleep(20); - sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); - } - - /* Do a reset to ensure DMA is really stopped */ - axienet_lock_mii(lp); - __axienet_device_reset(lp); - axienet_unlock_mii(lp); - cancel_work_sync(&lp->dma_err_task); if (lp->eth_irq > 0) @@ -1449,14 +1493,12 @@ axienet_ethtools_get_coalesce(struct net_device *ndev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - u32 regval = 0; struct axienet_local *lp = netdev_priv(ndev); - regval = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - ecoalesce->rx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK) - >> XAXIDMA_COALESCE_SHIFT; - regval = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - ecoalesce->tx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK) - >> XAXIDMA_COALESCE_SHIFT; + + ecoalesce->rx_max_coalesced_frames = lp->coalesce_count_rx; + ecoalesce->rx_coalesce_usecs = lp->coalesce_usec_rx; + ecoalesce->tx_max_coalesced_frames = lp->coalesce_count_tx; + ecoalesce->tx_coalesce_usecs = lp->coalesce_usec_tx; return 0; } @@ -1489,8 +1531,12 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, if (ecoalesce->rx_max_coalesced_frames) lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; + if (ecoalesce->rx_coalesce_usecs) + lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs; if (ecoalesce->tx_max_coalesced_frames) lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames; + if (ecoalesce->tx_coalesce_usecs) + lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs; return 0; } @@ -1521,7 +1567,8 @@ static int axienet_ethtools_nway_reset(struct net_device *dev) } static const struct ethtool_ops axienet_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USECS, .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, @@ -1537,78 +1584,78 @@ static const struct ethtool_ops axienet_ethtool_ops = { .nway_reset = axienet_ethtools_nway_reset, }; -static void axienet_mac_pcs_get_state(struct phylink_config *config, - struct phylink_link_state *state) +static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs) { - struct net_device *ndev = to_net_dev(config->dev); - struct axienet_local *lp = netdev_priv(ndev); + return container_of(pcs, struct axienet_local, pcs); +} - switch (state->interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - phylink_mii_c22_pcs_get_state(lp->pcs_phy, state); - break; - default: - break; - } +static void axienet_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ + struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy; + + phylink_mii_c22_pcs_get_state(pcs_phy, state); } -static void axienet_mac_an_restart(struct phylink_config *config) +static void axienet_pcs_an_restart(struct phylink_pcs *pcs) { - struct net_device *ndev = to_net_dev(config->dev); - struct axienet_local *lp = netdev_priv(ndev); + struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy; - phylink_mii_c22_pcs_an_restart(lp->pcs_phy); + phylink_mii_c22_pcs_an_restart(pcs_phy); } -static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode, - phy_interface_t iface) +static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) { - struct net_device *ndev = to_net_dev(config->dev); + struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy; + struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev; struct axienet_local *lp = netdev_priv(ndev); int ret; - switch (iface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - if (!lp->switch_x_sgmii) - return 0; - - ret = mdiobus_write(lp->pcs_phy->bus, - lp->pcs_phy->addr, - XLNX_MII_STD_SELECT_REG, - iface == PHY_INTERFACE_MODE_SGMII ? + if (lp->switch_x_sgmii) { + ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG, + interface == PHY_INTERFACE_MODE_SGMII ? XLNX_MII_STD_SELECT_SGMII : 0); - if (ret < 0) - netdev_warn(ndev, "Failed to switch PHY interface: %d\n", + if (ret < 0) { + netdev_warn(ndev, + "Failed to switch PHY interface: %d\n", ret); - return ret; - default: - return 0; + return ret; + } } + + ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising); + if (ret < 0) + netdev_warn(ndev, "Failed to configure PCS: %d\n", ret); + + return ret; } -static void axienet_mac_config(struct phylink_config *config, unsigned int mode, - const struct phylink_link_state *state) +static const struct phylink_pcs_ops axienet_pcs_ops = { + .pcs_get_state = axienet_pcs_get_state, + .pcs_config = axienet_pcs_config, + .pcs_an_restart = axienet_pcs_an_restart, +}; + +static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config, + phy_interface_t interface) { struct net_device *ndev = to_net_dev(config->dev); struct axienet_local *lp = netdev_priv(ndev); - int ret; - switch (state->interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode, - state->interface, - state->advertising); - if (ret < 0) - netdev_warn(ndev, "Failed to configure PCS: %d\n", - ret); - break; + if (interface == PHY_INTERFACE_MODE_1000BASEX || + interface == PHY_INTERFACE_MODE_SGMII) + return &lp->pcs; - default: - break; - } + return NULL; +} + +static void axienet_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ + /* nothing meaningful to do */ } static void axienet_mac_link_down(struct phylink_config *config, @@ -1663,9 +1710,7 @@ static void axienet_mac_link_up(struct phylink_config *config, static const struct phylink_mac_ops axienet_phylink_ops = { .validate = phylink_generic_validate, - .mac_pcs_get_state = axienet_mac_pcs_get_state, - .mac_an_restart = axienet_mac_an_restart, - .mac_prepare = axienet_mac_prepare, + .mac_select_pcs = axienet_mac_select_pcs, .mac_config = axienet_mac_config, .mac_link_down = axienet_mac_link_down, .mac_link_up = axienet_mac_link_up, @@ -1680,29 +1725,26 @@ static const struct phylink_mac_ops axienet_phylink_ops = { */ static void axienet_dma_err_handler(struct work_struct *work) { + u32 i; u32 axienet_status; - u32 cr, i; + struct axidma_bd *cur_p; struct axienet_local *lp = container_of(work, struct axienet_local, dma_err_task); struct net_device *ndev = lp->ndev; - struct axidma_bd *cur_p; + + napi_disable(&lp->napi); axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); - /* When we do an Axi Ethernet reset, it resets the complete core - * including the MDIO. MDIO must be disabled before resetting. - * Hold MDIO bus lock to avoid MDIO accesses during the reset. - */ - axienet_lock_mii(lp); - __axienet_device_reset(lp); - axienet_unlock_mii(lp); + + axienet_dma_stop(lp); for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; if (cur_p->cntrl) { dma_addr_t addr = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, addr, + dma_unmap_single(lp->dev, addr, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); @@ -1735,50 +1777,7 @@ static void axienet_dma_err_handler(struct work_struct *work) lp->tx_bd_tail = 0; lp->rx_bd_ci = 0; - /* Start updating the Rx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = ((cr & ~XAXIDMA_COALESCE_MASK) | - (XAXIDMA_DFT_RX_THRESHOLD << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = ((cr & ~XAXIDMA_DELAY_MASK) | - (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Finally write to the Rx channel control register */ - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); - - /* Start updating the Tx channel control register */ - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - /* Update the interrupt coalesce count */ - cr = (((cr & ~XAXIDMA_COALESCE_MASK)) | - (XAXIDMA_DFT_TX_THRESHOLD << XAXIDMA_COALESCE_SHIFT)); - /* Update the delay timer count */ - cr = (((cr & ~XAXIDMA_DELAY_MASK)) | - (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); - /* Enable coalesce, delay timer and error interrupts */ - cr |= XAXIDMA_IRQ_ALL_MASK; - /* Finally write to the Tx channel control register */ - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); - - /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception. - */ - axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); - axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + - (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); - - /* Write to the RS (Run-stop) bit in the Tx channel control register. - * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting - */ - axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, - cr | XAXIDMA_CR_RUNSTOP_MASK); + axienet_dma_start(lp); axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET); axienet_status &= ~XAE_RCW1_RX_MASK; @@ -1799,6 +1798,7 @@ static void axienet_dma_err_handler(struct work_struct *work) axienet_set_mac_address(ndev, NULL); axienet_set_multicast_list(ndev); axienet_setoptions(ndev, lp->options); + napi_enable(&lp->napi); } /** @@ -1847,6 +1847,8 @@ static int axienet_probe(struct platform_device *pdev) lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT; + netif_napi_add(ndev, &lp->napi, axienet_poll, NAPI_POLL_WEIGHT); + lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk"); if (!lp->axi_clk) { /* For backward compatibility, if named AXI clock is not present, @@ -2053,7 +2055,9 @@ static int axienet_probe(struct platform_device *pdev) } lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; + lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; + lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC; /* Reset core now that clocks are enabled, prior to accessing MDIO */ ret = __axienet_device_reset(lp); @@ -2079,12 +2083,12 @@ static int axienet_probe(struct platform_device *pdev) ret = -EPROBE_DEFER; goto cleanup_mdio; } - lp->phylink_config.pcs_poll = true; + lp->pcs.ops = &axienet_pcs_ops; + lp->pcs.poll = true; } lp->phylink_config.dev = &ndev->dev; lp->phylink_config.type = PHYLINK_NETDEV; - lp->phylink_config.legacy_pre_march2020 = true; lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | MAC_10FD | MAC_100FD | MAC_1000FD; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 77fa2cb03aca..57a24f62e353 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -498,7 +498,7 @@ static void xemaclite_update_address(struct net_local *drvdata, * @dev: Pointer to the network device instance * @address: Void pointer to the sockaddr structure * - * This function copies the HW address from the sockaddr strucutre to the + * This function copies the HW address from the sockaddr structure to the * net_device structure and updates the address in HW. * * Return: Error if the net device is busy or 0 if the addr is set |