diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
75 files changed, 8820 insertions, 2142 deletions
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 86493fea56e4..416da9619928 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3565,8 +3565,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE))) adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE; - pr_info("%s changing MTU from %d to %d\n", - netdev->name, netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index de8c5818a305..adce7e319b9e 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -894,8 +894,9 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: - /* fall through */ case e1000_pch_cnp: + /* fall through */ + case e1000_pch_tgp: mask |= BIT(18); break; default: @@ -1559,6 +1560,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) switch (hw->mac.type) { case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: fext_nvm11 = er32(FEXTNVM11); fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX; ew32(FEXTNVM11, fext_nvm11); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index eff75bd8a8f0..f556163481cb 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -86,6 +86,17 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_ICP_I219_V8 0x15E0 #define E1000_DEV_ID_PCH_ICP_I219_LM9 0x15E1 #define E1000_DEV_ID_PCH_ICP_I219_V9 0x15E2 +#define E1000_DEV_ID_PCH_CMP_I219_LM10 0x0D4E +#define E1000_DEV_ID_PCH_CMP_I219_V10 0x0D4F +#define E1000_DEV_ID_PCH_CMP_I219_LM11 0x0D4C +#define E1000_DEV_ID_PCH_CMP_I219_V11 0x0D4D +#define E1000_DEV_ID_PCH_CMP_I219_LM12 0x0D53 +#define E1000_DEV_ID_PCH_CMP_I219_V12 0x0D55 +#define E1000_DEV_ID_PCH_TGP_I219_LM13 0x15FB +#define E1000_DEV_ID_PCH_TGP_I219_V13 0x15FC +#define E1000_DEV_ID_PCH_TGP_I219_LM14 0x15F9 +#define E1000_DEV_ID_PCH_TGP_I219_V14 0x15FA +#define E1000_DEV_ID_PCH_TGP_I219_LM15 0x15F4 #define E1000_REVISION_4 4 @@ -109,6 +120,7 @@ enum e1000_mac_type { e1000_pch_lpt, e1000_pch_spt, e1000_pch_cnp, + e1000_pch_tgp, }; enum e1000_media_type { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a1fab77b2096..b4135c50e905 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -316,6 +316,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -458,6 +459,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ @@ -700,6 +702,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: case e1000_pchlan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; @@ -1638,6 +1641,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: rc = e1000_init_phy_params_pchlan(hw); break; default: @@ -2090,6 +2094,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: @@ -3127,6 +3132,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) switch (hw->mac.type) { case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: bank1_offset = nvm->flash_bank_size; act_offset = E1000_ICH_NVM_SIG_WORD; @@ -4070,6 +4076,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + case e1000_pch_tgp: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d7d56e42a6aa..fe7997c18a10 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3538,6 +3538,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) adapter->cc.shift = shift; break; case e1000_pch_cnp: + case e1000_pch_tgp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ incperiod = INCPERIOD_24MHZ; @@ -4049,6 +4050,8 @@ void e1000e_reset(struct e1000_adapter *adapter) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + /* fall-through */ + case e1000_pch_tgp: fc->refresh_time = 0xFFFF; fc->pause_time = 0xFFFF; @@ -4715,12 +4718,12 @@ int e1000e_close(struct net_device *netdev) pm_runtime_get_sync(&pdev->dev); - if (!test_bit(__E1000_DOWN, &adapter->state)) { + if (netif_device_present(netdev)) { e1000e_down(adapter, true); e1000_free_irq(adapter); /* Link status message must follow this format */ - pr_info("%s NIC Link is Down\n", adapter->netdev->name); + pr_info("%s NIC Link is Down\n", netdev->name); } napi_disable(&adapter->napi); @@ -6028,7 +6031,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) usleep_range(1000, 1100); /* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */ adapter->max_frame_size = max_frame; - e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; pm_runtime_get_sync(netdev->dev.parent); @@ -6294,14 +6298,188 @@ fl_out: pm_runtime_put_sync(netdev->dev.parent); } +#ifdef CONFIG_PM_SLEEP +/* S0ix implementation */ +static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 mac_data; + u16 phy_data; + + /* Disable the periodic inband message, + * don't request PCIe clock in K1 page770_17[10:9] = 10b + */ + e1e_rphy(hw, HV_PM_CTRL, &phy_data); + phy_data &= ~HV_PM_CTRL_K1_CLK_REQ; + phy_data |= BIT(10); + e1e_wphy(hw, HV_PM_CTRL, phy_data); + + /* Make sure we don't exit K1 every time a new packet arrives + * 772_29[5] = 1 CS_Mode_Stay_In_K1 + */ + e1e_rphy(hw, I217_CGFREG, &phy_data); + phy_data |= BIT(5); + e1e_wphy(hw, I217_CGFREG, phy_data); + + /* Change the MAC/PHY interface to SMBus + * Force the SMBus in PHY page769_23[0] = 1 + * Force the SMBus in MAC CTRL_EXT[11] = 1 + */ + e1e_rphy(hw, CV_SMB_CTRL, &phy_data); + phy_data |= CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, phy_data); + mac_data = er32(CTRL_EXT); + mac_data |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_data); + + /* DFT control: PHY bit: page769_20[0] = 1 + * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1 + */ + e1e_rphy(hw, I82579_DFT_CTRL, &phy_data); + phy_data |= BIT(0); + e1e_wphy(hw, I82579_DFT_CTRL, phy_data); + + mac_data = er32(EXTCNF_CTRL); + mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; + ew32(EXTCNF_CTRL, mac_data); + + /* Check MAC Tx/Rx packet buffer pointers. + * Reset MAC Tx/Rx packet buffer pointers to suppress any + * pending traffic indication that would prevent power gating. + */ + mac_data = er32(TDFH); + if (mac_data) + ew32(TDFH, 0); + mac_data = er32(TDFT); + if (mac_data) + ew32(TDFT, 0); + mac_data = er32(TDFHS); + if (mac_data) + ew32(TDFHS, 0); + mac_data = er32(TDFTS); + if (mac_data) + ew32(TDFTS, 0); + mac_data = er32(TDFPC); + if (mac_data) + ew32(TDFPC, 0); + mac_data = er32(RDFH); + if (mac_data) + ew32(RDFH, 0); + mac_data = er32(RDFT); + if (mac_data) + ew32(RDFT, 0); + mac_data = er32(RDFHS); + if (mac_data) + ew32(RDFHS, 0); + mac_data = er32(RDFTS); + if (mac_data) + ew32(RDFTS, 0); + mac_data = er32(RDFPC); + if (mac_data) + ew32(RDFPC, 0); + + /* Enable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(22); + ew32(FEXTNVM7, mac_data); + + /* Disable the time synchronization clock */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(31); + mac_data &= ~BIT(0); + ew32(FEXTNVM7, mac_data); + + /* Dynamic Power Gating Enable */ + mac_data = er32(CTRL_EXT); + mac_data |= BIT(3); + ew32(CTRL_EXT, mac_data); + + /* Enable the Dynamic Clock Gating in the DMA and MAC */ + mac_data = er32(CTRL_EXT); + mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN; + ew32(CTRL_EXT, mac_data); + + /* No MAC DPG gating SLP_S0 in modern standby + * Switch the logic of the lanphypc to use PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data |= BIT(7); + ew32(FEXTNVM5, mac_data); +} + +static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 mac_data; + u16 phy_data; + + /* Disable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data &= 0xFFBFFFFF; + ew32(FEXTNVM7, mac_data); + + /* Enable the time synchronization clock */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(0); + ew32(FEXTNVM7, mac_data); + + /* Disable Dynamic Power Gating */ + mac_data = er32(CTRL_EXT); + mac_data &= 0xFFFFFFF7; + ew32(CTRL_EXT, mac_data); + + /* Disable the Dynamic Clock Gating in the DMA and MAC */ + mac_data = er32(CTRL_EXT); + mac_data &= 0xFFF7FFFF; + ew32(CTRL_EXT, mac_data); + + /* Revert the lanphypc logic to use the internal Gbe counter + * and not the PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data &= 0xFFFFFF7F; + ew32(FEXTNVM5, mac_data); + + /* Enable the periodic inband message, + * Request PCIe clock in K1 page770_17[10:9] =01b + */ + e1e_rphy(hw, HV_PM_CTRL, &phy_data); + phy_data &= 0xFBFF; + phy_data |= HV_PM_CTRL_K1_CLK_REQ; + e1e_wphy(hw, HV_PM_CTRL, phy_data); + + /* Return back configuration + * 772_29[5] = 0 CS_Mode_Stay_In_K1 + */ + e1e_rphy(hw, I217_CGFREG, &phy_data); + phy_data &= 0xFFDF; + e1e_wphy(hw, I217_CGFREG, phy_data); + + /* Change the MAC/PHY interface to Kumeran + * Unforce the SMBus in PHY page769_23[0] = 0 + * Unforce the SMBus in MAC CTRL_EXT[11] = 0 + */ + e1e_rphy(hw, CV_SMB_CTRL, &phy_data); + phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy(hw, CV_SMB_CTRL, phy_data); + mac_data = er32(CTRL_EXT); + mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_data); +} +#endif /* CONFIG_PM_SLEEP */ + static int e1000e_pm_freeze(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); struct e1000_adapter *adapter = netdev_priv(netdev); + bool present; + + rtnl_lock(); + present = netif_device_present(netdev); netif_device_detach(netdev); - if (netif_running(netdev)) { + if (present && netif_running(netdev)) { int count = E1000_CHECK_RESET_COUNT; while (test_bit(__E1000_RESETTING, &adapter->state) && count--) @@ -6313,6 +6491,8 @@ static int e1000e_pm_freeze(struct device *dev) e1000e_down(adapter, false); e1000_free_irq(adapter); } + rtnl_unlock(); + e1000e_reset_interrupt_capability(adapter); /* Allow time for pending master requests to run */ @@ -6560,6 +6740,30 @@ static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state) __e1000e_disable_aspm(pdev, state, 1); } +static int e1000e_pm_thaw(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct e1000_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + e1000e_set_interrupt_capability(adapter); + + rtnl_lock(); + if (netif_running(netdev)) { + rc = e1000_request_irq(adapter); + if (rc) + goto err_irq; + + e1000e_up(adapter); + } + + netif_device_attach(netdev); +err_irq: + rtnl_unlock(); + + return rc; +} + #ifdef CONFIG_PM static int __e1000_resume(struct pci_dev *pdev) { @@ -6627,29 +6831,12 @@ static int __e1000_resume(struct pci_dev *pdev) } #ifdef CONFIG_PM_SLEEP -static int e1000e_pm_thaw(struct device *dev) -{ - struct net_device *netdev = dev_get_drvdata(dev); - struct e1000_adapter *adapter = netdev_priv(netdev); - - e1000e_set_interrupt_capability(adapter); - if (netif_running(netdev)) { - u32 err = e1000_request_irq(adapter); - - if (err) - return err; - - e1000e_up(adapter); - } - - netif_device_attach(netdev); - - return 0; -} - static int e1000e_pm_suspend(struct device *dev) { + struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); + struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); + struct e1000_hw *hw = &adapter->hw; int rc; e1000e_flush_lpic(pdev); @@ -6660,14 +6847,25 @@ static int e1000e_pm_suspend(struct device *dev) if (rc) e1000e_pm_thaw(dev); + /* Introduce S0ix implementation */ + if (hw->mac.type >= e1000_pch_cnp) + e1000e_s0ix_entry_flow(adapter); + return rc; } static int e1000e_pm_resume(struct device *dev) { + struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); + struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); + struct e1000_hw *hw = &adapter->hw; int rc; + /* Introduce S0ix implementation */ + if (hw->mac.type >= e1000_pch_cnp) + e1000e_s0ix_exit_flow(adapter); + rc = __e1000_resume(pdev); if (rc) return rc; @@ -6818,16 +7016,11 @@ static void e1000_netpoll(struct net_device *netdev) static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { - struct net_device *netdev = pci_get_drvdata(pdev); - struct e1000_adapter *adapter = netdev_priv(netdev); - - netif_device_detach(netdev); + e1000e_pm_freeze(&pdev->dev); if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; - if (netif_running(netdev)) - e1000e_down(adapter, true); pci_disable_device(pdev); /* Request a slot slot reset. */ @@ -6893,10 +7086,7 @@ static void e1000_io_resume(struct pci_dev *pdev) e1000_init_manageability_pt(adapter); - if (netif_running(netdev)) - e1000e_up(adapter); - - netif_device_attach(netdev); + e1000e_pm_thaw(&pdev->dev); /* If the controller has AMT, do not set DRV_LOAD until the interface * is up. For all other cases, let the f/w know that the h/w is now @@ -7407,15 +7597,13 @@ static void e1000_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); - bool down = test_bit(__E1000_DOWN, &adapter->state); e1000e_ptp_remove(adapter); /* The timers may be rescheduled, so explicitly disable them * from being rescheduled. */ - if (!down) - set_bit(__E1000_DOWN, &adapter->state); + set_bit(__E1000_DOWN, &adapter->state); del_timer_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); @@ -7435,9 +7623,6 @@ static void e1000_remove(struct pci_dev *pdev) } } - /* Don't lie to e1000_close() down the road. */ - if (!down) - clear_bit(__E1000_DOWN, &adapter->state); unregister_netdev(netdev); if (pci_dev_run_wake(pdev)) @@ -7567,6 +7752,17 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V8), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM9), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V9), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM10), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V10), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM11), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 1a4c65d9feb4..eaa5a0fb99f0 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -295,6 +295,8 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: + /* fall-through */ + case e1000_pch_tgp: if ((hw->mac.type < e1000_pch_lpt) || (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { adapter->ptp_clock_info.max_adj = 24000000 - 1; diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h index 47f5ca793970..df59fd1d660c 100644 --- a/drivers/net/ethernet/intel/e1000e/regs.h +++ b/drivers/net/ethernet/intel/e1000e/regs.h @@ -18,6 +18,7 @@ #define E1000_FEXTNVM 0x00028 /* Future Extended NVM - RW */ #define E1000_FEXTNVM3 0x0003C /* Future Extended NVM 3 - RW */ #define E1000_FEXTNVM4 0x00024 /* Future Extended NVM 4 - RW */ +#define E1000_FEXTNVM5 0x00014 /* Future Extended NVM 5 - RW */ #define E1000_FEXTNVM6 0x00010 /* Future Extended NVM 6 - RW */ #define E1000_FEXTNVM7 0x000E4 /* Future Extended NVM 7 - RW */ #define E1000_FEXTNVM9 0x5BB4 /* Future Extended NVM 9 - RW */ @@ -234,4 +235,7 @@ #define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ #define E1000_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ +/* PHY registers */ +#define I82579_DFT_CTRL PHY_REG(769, 20) + #endif diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index b14441944b4b..f306084ca12c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -534,6 +534,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev); int fm10k_iov_resume(struct pci_dev *pdev); void fm10k_iov_disable(struct pci_dev *pdev); int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs); +void fm10k_iov_update_stats(struct fm10k_intfc *interface); s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, @@ -542,6 +543,8 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int __always_unused min_rate, int max_rate); int fm10k_ndo_get_vf_config(struct net_device *netdev, int vf_idx, struct ifla_vf_info *ivi); +int fm10k_ndo_get_vf_stats(struct net_device *netdev, + int vf_idx, struct ifla_vf_stats *stats); /* DebugFS */ #ifdef CONFIG_DEBUG_FS diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index afe1fafd2447..8c50a128df29 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -520,6 +520,27 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs) return num_vfs; } +/** + * fm10k_iov_update_stats - Update stats for all VFs + * @interface: device private structure + * + * Updates the VF statistics for all enabled VFs. Expects to be called by + * fm10k_update_stats and assumes that locking via the __FM10K_UPDATING_STATS + * bit is already handled. + */ +void fm10k_iov_update_stats(struct fm10k_intfc *interface) +{ + struct fm10k_iov_data *iov_data = interface->iov_data; + struct fm10k_hw *hw = &interface->hw; + int i; + + if (!iov_data) + return; + + for (i = 0; i < iov_data->num_vfs; i++) + hw->iov.ops.update_stats(hw, iov_data->vf_info[i].stats, i); +} + static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface, struct fm10k_vf_info *vf_info) { @@ -650,3 +671,30 @@ int fm10k_ndo_get_vf_config(struct net_device *netdev, return 0; } + +int fm10k_ndo_get_vf_stats(struct net_device *netdev, + int vf_idx, struct ifla_vf_stats *stats) +{ + struct fm10k_intfc *interface = netdev_priv(netdev); + struct fm10k_iov_data *iov_data = interface->iov_data; + struct fm10k_hw *hw = &interface->hw; + struct fm10k_hw_stats_q *hw_stats; + u32 idx, qpp; + + /* verify SR-IOV is active and that vf idx is valid */ + if (!iov_data || vf_idx >= iov_data->num_vfs) + return -EINVAL; + + qpp = fm10k_queues_per_pool(hw); + hw_stats = iov_data->vf_info[vf_idx].stats; + + for (idx = 0; idx < qpp; idx++) { + stats->rx_packets += hw_stats[idx].rx_packets.count; + stats->tx_packets += hw_stats[idx].tx_packets.count; + stats->rx_bytes += hw_stats[idx].rx_bytes.count; + stats->tx_bytes += hw_stats[idx].tx_bytes.count; + stats->rx_dropped += hw_stats[idx].rx_drops.count; + } + + return 0; +} diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 2be9222510e7..17738b0a9873 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -11,7 +11,7 @@ #include "fm10k.h" -#define DRV_VERSION "0.26.1-k" +#define DRV_VERSION "0.27.1-k" #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 09f7a246e134..68baee04dc58 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1643,6 +1643,7 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_set_vf_vlan = fm10k_ndo_set_vf_vlan, .ndo_set_vf_rate = fm10k_ndo_set_vf_bw, .ndo_get_vf_config = fm10k_ndo_get_vf_config, + .ndo_get_vf_stats = fm10k_ndo_get_vf_stats, .ndo_udp_tunnel_add = fm10k_udp_tunnel_add, .ndo_udp_tunnel_del = fm10k_udp_tunnel_del, .ndo_dfwd_add_station = fm10k_dfwd_add_station, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index bb236fa44048..d122d0087191 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -630,6 +630,9 @@ void fm10k_update_stats(struct fm10k_intfc *interface) net_stats->rx_errors = rx_errors; net_stats->rx_dropped = interface->stats.nodesc_drop.count; + /* Update VF statistics */ + fm10k_iov_update_stats(interface); + clear_bit(__FM10K_UPDATING_STATS, interface->state); } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h index 160bc5b78f99..ceb9b791f799 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2019 Intel Corporation. */ #ifndef _FM10K_TLV_H_ #define _FM10K_TLV_H_ @@ -76,8 +76,8 @@ struct fm10k_tlv_attr { #define FM10K_TLV_ATTR_S32(id) { id, FM10K_TLV_SIGNED, 4 } #define FM10K_TLV_ATTR_S64(id) { id, FM10K_TLV_SIGNED, 8 } #define FM10K_TLV_ATTR_LE_STRUCT(id, len) { id, FM10K_TLV_LE_STRUCT, len } -#define FM10K_TLV_ATTR_NESTED(id) { id, FM10K_TLV_NESTED } -#define FM10K_TLV_ATTR_LAST { FM10K_TLV_ERROR } +#define FM10K_TLV_ATTR_NESTED(id) { id, FM10K_TLV_NESTED, 0 } +#define FM10K_TLV_ATTR_LAST { FM10K_TLV_ERROR, 0, 0 } struct fm10k_msg_data { unsigned int id; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 15ac1c7885bc..63968c5d7c5d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -581,6 +581,7 @@ struct fm10k_vf_info { * at the same offset as the mailbox */ struct fm10k_mbx_info mbx; /* PF side of VF mailbox */ + struct fm10k_hw_stats_q stats[FM10K_MAX_QUEUES_POOL]; int rate; /* Tx BW cap as defined by OS */ u16 glort; /* resource tag for this VF */ u16 sw_vid; /* Switch API assigned VLAN */ diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2af9f6308f84..cb6367334ca7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1118,6 +1118,7 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr); int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr); bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi); +int i40e_count_filters(struct i40e_vsi *vsi); struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr); void i40e_vlan_stripping_enable(struct i40e_vsi *vsi); #ifdef CONFIG_I40E_DCB diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 72c04881d290..9f0a4e92a231 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -508,6 +508,59 @@ shutdown_arq_out: } /** + * i40e_set_hw_flags - set HW flags + * @hw: pointer to the hardware structure + **/ +static void i40e_set_hw_flags(struct i40e_hw *hw) +{ + struct i40e_adminq_info *aq = &hw->aq; + + hw->flags = 0; + + switch (hw->mac.type) { + case I40E_MAC_XL710: + if (aq->api_maj_ver > 1 || + (aq->api_maj_ver == 1 && + aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710)) { + hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; + hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE; + /* The ability to RX (not drop) 802.1ad frames */ + hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE; + } + break; + case I40E_MAC_X722: + hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE | + I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK; + + if (aq->api_maj_ver > 1 || + (aq->api_maj_ver == 1 && + aq->api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722)) + hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE; + /* fall through */ + default: + break; + } + + /* Newer versions of firmware require lock when reading the NVM */ + if (aq->api_maj_ver > 1 || + (aq->api_maj_ver == 1 && + aq->api_min_ver >= 5)) + hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK; + + if (aq->api_maj_ver > 1 || + (aq->api_maj_ver == 1 && + aq->api_min_ver >= 8)) { + hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT; + hw->flags |= I40E_HW_FLAG_DROP_MODE; + } + + if (aq->api_maj_ver > 1 || + (aq->api_maj_ver == 1 && + aq->api_min_ver >= 9)) + hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED; +} + +/** * i40e_init_adminq - main initialization routine for Admin Queue * @hw: pointer to the hardware structure * @@ -571,6 +624,11 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) if (ret_code != I40E_SUCCESS) goto init_adminq_free_arq; + /* Some features were introduced in different FW API version + * for different MAC type. + */ + i40e_set_hw_flags(hw); + /* get the NVM version info */ i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION, &hw->nvm.version); @@ -596,25 +654,12 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE; } - /* Newer versions of firmware require lock when reading the NVM */ - if (hw->aq.api_maj_ver > 1 || - (hw->aq.api_maj_ver == 1 && - hw->aq.api_min_ver >= 5)) - hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK; - /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */ if (hw->aq.api_maj_ver > 1 || (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver >= 7)) hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE; - if (hw->aq.api_maj_ver > 1 || - (hw->aq.api_maj_ver == 1 && - hw->aq.api_min_ver >= 8)) { - hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT; - hw->flags |= I40E_HW_FLAG_DROP_MODE; - } - if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) { ret_code = I40E_ERR_FIRMWARE_API_VERSION; goto init_adminq_free_arq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 69a2daaca5c5..aa5f1c0aa721 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -2251,7 +2251,13 @@ struct i40e_aqc_phy_register_access { #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1 #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2 u8 dev_address; - u8 reserved1[2]; + u8 cmd_flags; +#define I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE 0x01 +#define I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER 0x02 +#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT 2 +#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK (0x3 << \ + I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT) + u8 reserved1; __le32 reg_address; __le32 reg_value; u8 reserved2[4]; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 7560f06768e0..d4055037af89 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -29,6 +29,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_QSFP_C: case I40E_DEV_ID_10G_BASE_T: case I40E_DEV_ID_10G_BASE_T4: + case I40E_DEV_ID_10G_BASE_T_BC: case I40E_DEV_ID_10G_B: case I40E_DEV_ID_10G_SFP: case I40E_DEV_ID_20G_KR2: @@ -933,10 +934,6 @@ i40e_status i40e_init_shared_code(struct i40e_hw *hw) else hw->pf_id = (u8)(func_rid & 0x7); - if (hw->mac.type == I40E_MAC_X722) - hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE | - I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK; - status = i40e_init_nvm(hw); return status; } @@ -1441,9 +1438,9 @@ static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx) u32 gpio_val = 0; u32 port; - if (!hw->func_caps.led[idx]) + if (!I40E_IS_X710TL_DEVICE(hw->device_id) && + !hw->func_caps.led[idx]) return 0; - gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(idx)); port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK) >> I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT; @@ -1462,8 +1459,15 @@ static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx) #define I40E_FILTER_ACTIVITY 0xE #define I40E_LINK_ACTIVITY 0xC #define I40E_MAC_ACTIVITY 0xD +#define I40E_FW_LED BIT(4) +#define I40E_LED_MODE_VALID (I40E_GLGEN_GPIO_CTL_LED_MODE_MASK >> \ + I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) + #define I40E_LED0 22 +#define I40E_PIN_FUNC_SDP 0x0 +#define I40E_PIN_FUNC_LED 0x1 + /** * i40e_led_get - return current on/off mode * @hw: pointer to the hw struct @@ -1508,8 +1512,10 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink) { int i; - if (mode & 0xfffffff0) + if (mode & ~I40E_LED_MODE_VALID) { hw_dbg(hw, "invalid mode passed in %X\n", mode); + return; + } /* as per the documentation GPIO 22-29 are the LED * GPIO pins named LED0..LED7 @@ -1519,6 +1525,20 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink) if (!gpio_val) continue; + + if (I40E_IS_X710TL_DEVICE(hw->device_id)) { + u32 pin_func = 0; + + if (mode & I40E_FW_LED) + pin_func = I40E_PIN_FUNC_SDP; + else + pin_func = I40E_PIN_FUNC_LED; + + gpio_val &= ~I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK; + gpio_val |= ((pin_func << + I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) & + I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK); + } gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK; /* this & is a bit of paranoia, but serves as a range check */ gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) & @@ -2571,9 +2591,16 @@ noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw) if (status) return status; - hw->phy.link_info.req_fec_info = - abilities.fec_cfg_curr_mod_ext_info & - (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS); + if (abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_ENABLE_FEC_AUTO) + hw->phy.link_info.req_fec_info = + (I40E_AQ_REQUEST_FEC_KR | + I40E_AQ_REQUEST_FEC_RS); + else + hw->phy.link_info.req_fec_info = + abilities.fec_cfg_curr_mod_ext_info & + (I40E_AQ_REQUEST_FEC_KR | + I40E_AQ_REQUEST_FEC_RS); memcpy(hw->phy.link_info.module_type, &abilities.module_type, sizeof(hw->phy.link_info.module_type)); @@ -4885,6 +4912,7 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw, break; case I40E_DEV_ID_10G_BASE_T: case I40E_DEV_ID_10G_BASE_T4: + case I40E_DEV_ID_10G_BASE_T_BC: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_25G_B: case I40E_DEV_ID_25G_SFP28: @@ -5044,7 +5072,7 @@ static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr, status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL, - I40E_PHY_COM_REG_PAGE, + I40E_PHY_COM_REG_PAGE, true, I40E_PHY_LED_PROV_REG_1, reg_val, NULL); } else { @@ -5077,7 +5105,7 @@ static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr, status = i40e_aq_set_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL, - I40E_PHY_COM_REG_PAGE, + I40E_PHY_COM_REG_PAGE, true, I40E_PHY_LED_PROV_REG_1, reg_val, NULL); } else { @@ -5116,7 +5144,7 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL, - I40E_PHY_COM_REG_PAGE, + I40E_PHY_COM_REG_PAGE, true, I40E_PHY_LED_PROV_REG_1, ®_val_aq, NULL); if (status == I40E_SUCCESS) @@ -5321,20 +5349,49 @@ do_retry: } /** - * i40e_aq_set_phy_register + * i40e_mdio_if_number_selection - MDIO I/F number selection + * @hw: pointer to the hw struct + * @set_mdio: use MDIO I/F number specified by mdio_num + * @mdio_num: MDIO I/F number + * @cmd: pointer to PHY Register command structure + **/ +static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio, + u8 mdio_num, + struct i40e_aqc_phy_register_access *cmd) +{ + if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) { + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED) + cmd->cmd_flags |= + I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER | + ((mdio_num << + I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT) & + I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK); + else + i40e_debug(hw, I40E_DEBUG_PHY, + "MDIO I/F number selection not supported by current FW version.\n"); + } +} + +/** + * i40e_aq_set_phy_register_ext * @hw: pointer to the hw struct * @phy_select: select which phy should be accessed * @dev_addr: PHY device address + * @set_mdio: use MDIO I/F number specified by mdio_num + * @mdio_num: MDIO I/F number * @reg_addr: PHY register address * @reg_val: new register value * @cmd_details: pointer to command details structure or NULL * * Write the external PHY register. + * NOTE: In common cases MDIO I/F number should not be changed, thats why you + * may use simple wrapper i40e_aq_set_phy_register. **/ -i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, - u8 phy_select, u8 dev_addr, - u32 reg_addr, u32 reg_val, - struct i40e_asq_cmd_details *cmd_details) +enum i40e_status_code i40e_aq_set_phy_register_ext(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, bool page_change, + bool set_mdio, u8 mdio_num, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; struct i40e_aqc_phy_register_access *cmd = @@ -5349,26 +5406,36 @@ i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, cmd->reg_address = cpu_to_le32(reg_addr); cmd->reg_value = cpu_to_le32(reg_val); + i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd); + + if (!page_change) + cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE; + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); return status; } /** - * i40e_aq_get_phy_register + * i40e_aq_get_phy_register_ext * @hw: pointer to the hw struct * @phy_select: select which phy should be accessed * @dev_addr: PHY device address + * @set_mdio: use MDIO I/F number specified by mdio_num + * @mdio_num: MDIO I/F number * @reg_addr: PHY register address * @reg_val: read register value * @cmd_details: pointer to command details structure or NULL * * Read the external PHY register. + * NOTE: In common cases MDIO I/F number should not be changed, thats why you + * may use simple wrapper i40e_aq_get_phy_register. **/ -i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, - u8 phy_select, u8 dev_addr, - u32 reg_addr, u32 *reg_val, - struct i40e_asq_cmd_details *cmd_details) +enum i40e_status_code i40e_aq_get_phy_register_ext(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, bool page_change, + bool set_mdio, u8 mdio_num, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; struct i40e_aqc_phy_register_access *cmd = @@ -5382,6 +5449,11 @@ i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, cmd->dev_address = dev_addr; cmd->reg_address = cpu_to_le32(reg_addr); + i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd); + + if (!page_change) + cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE; + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); if (!status) *reg_val = le32_to_cpu(cmd->reg_value); diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 200a1cb3b536..9de503c5f99b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -889,7 +889,9 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change) ret = i40e_read_nvm_module_data(hw, I40E_SR_EMP_SR_SETTINGS_PTR, - offset, 1, + offset, + I40E_LLDP_CURRENT_STATUS_OFFSET, + I40E_LLDP_CURRENT_STATUS_SIZE, &lldp_cfg.adminstatus); } else { ret = i40e_read_lldp_cfg(hw, &lldp_cfg); diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h index 2a80c5daa376..ba86ad833bee 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h @@ -32,6 +32,9 @@ #define I40E_CEE_MAX_FEAT_TYPE 3 #define I40E_LLDP_CURRENT_STATUS_XL710_OFFSET 0x2B #define I40E_LLDP_CURRENT_STATUS_X722_OFFSET 0x31 +#define I40E_LLDP_CURRENT_STATUS_OFFSET 1 +#define I40E_LLDP_CURRENT_STATUS_SIZE 1 + /* Defines for LLDP TLV header */ #define I40E_LLDP_TLV_LEN_SHIFT 0 #define I40E_LLDP_TLV_LEN_MASK (0x01FF << I40E_LLDP_TLV_LEN_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index bac4da031f9b..bf15a868292f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -23,6 +23,8 @@ #define I40E_DEV_ID_10G_BASE_T_BC 0x15FF #define I40E_DEV_ID_10G_B 0x104F #define I40E_DEV_ID_10G_SFP 0x104E +#define I40E_IS_X710TL_DEVICE(d) \ + ((d) == I40E_DEV_ID_10G_BASE_T_BC) #define I40E_DEV_ID_KX_X722 0x37CE #define I40E_DEV_ID_QSFP_X722 0x37CF #define I40E_DEV_ID_SFP_X722 0x37D0 diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 41e1240acaea..d24d8731bef0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -722,7 +722,14 @@ static void i40e_get_settings_link_up_fec(u8 req_fec_info, ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); - if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) { + if ((I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) && + (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info)) { + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + } else if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) { ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); } else if (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info) { ethtool_link_ksettings_add_link_mode(ks, advertising, @@ -730,12 +737,6 @@ static void i40e_get_settings_link_up_fec(u8 req_fec_info, } else { ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); - if (I40E_AQ_SET_FEC_AUTO & req_fec_info) { - ethtool_link_ksettings_add_link_mode(ks, advertising, - FEC_RS); - ethtool_link_ksettings_add_link_mode(ks, advertising, - FEC_BASER); - } } } @@ -1437,6 +1438,7 @@ static int i40e_get_fec_param(struct net_device *netdev, struct i40e_hw *hw = &pf->hw; i40e_status status = 0; int err = 0; + u8 fec_cfg; /* Get the current phy config */ memset(&abilities, 0, sizeof(abilities)); @@ -1448,18 +1450,16 @@ static int i40e_get_fec_param(struct net_device *netdev, } fecparam->fec = 0; - if (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_AUTO) + fec_cfg = abilities.fec_cfg_curr_mod_ext_info; + if (fec_cfg & I40E_AQ_SET_FEC_AUTO) fecparam->fec |= ETHTOOL_FEC_AUTO; - if ((abilities.fec_cfg_curr_mod_ext_info & - I40E_AQ_SET_FEC_REQUEST_RS) || - (abilities.fec_cfg_curr_mod_ext_info & - I40E_AQ_SET_FEC_ABILITY_RS)) + else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_RS | + I40E_AQ_SET_FEC_ABILITY_RS)) fecparam->fec |= ETHTOOL_FEC_RS; - if ((abilities.fec_cfg_curr_mod_ext_info & - I40E_AQ_SET_FEC_REQUEST_KR) || - (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_ABILITY_KR)) + else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_KR | + I40E_AQ_SET_FEC_ABILITY_KR)) fecparam->fec |= ETHTOOL_FEC_BASER; - if (abilities.fec_cfg_curr_mod_ext_info == 0) + if (fec_cfg == 0) fecparam->fec |= ETHTOOL_FEC_OFF; if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) @@ -5112,7 +5112,7 @@ static int i40e_get_module_info(struct net_device *netdev, case I40E_MODULE_TYPE_SFP: status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - I40E_I2C_EEPROM_DEV_ADDR, + I40E_I2C_EEPROM_DEV_ADDR, true, I40E_MODULE_SFF_8472_COMP, &sff8472_comp, NULL); if (status) @@ -5120,7 +5120,7 @@ static int i40e_get_module_info(struct net_device *netdev, status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - I40E_I2C_EEPROM_DEV_ADDR, + I40E_I2C_EEPROM_DEV_ADDR, true, I40E_MODULE_SFF_8472_SWAP, &sff8472_swap, NULL); if (status) @@ -5152,7 +5152,7 @@ static int i40e_get_module_info(struct net_device *netdev, /* Read from memory page 0. */ status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - 0, + 0, true, I40E_MODULE_REVISION_ADDR, &sff8636_rev, NULL); if (status) @@ -5223,7 +5223,7 @@ static int i40e_get_module_eeprom(struct net_device *netdev, status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - addr, offset, &value, NULL); + true, addr, offset, &value, NULL); if (status) return -EIO; data[i] = value; @@ -5242,6 +5242,7 @@ static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata) } static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = { + .get_drvinfo = i40e_get_drvinfo, .set_eeprom = i40e_set_eeprom, .get_eeprom_len = i40e_get_eeprom_len, .get_eeprom = i40e_get_eeprom, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6031223eafab..1ccabeafa44c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1110,6 +1110,25 @@ void i40e_update_stats(struct i40e_vsi *vsi) } /** + * i40e_count_filters - counts VSI mac filters + * @vsi: the VSI to be searched + * + * Returns count of mac filters + **/ +int i40e_count_filters(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct hlist_node *h; + int bkt; + int cnt = 0; + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) + ++cnt; + + return cnt; +} + +/** * i40e_find_filter - Search VSI filter list for specific mac/vlan filter * @vsi: the VSI to be searched * @macaddr: the MAC address @@ -2645,8 +2664,8 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - netdev_info(netdev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) i40e_vsi_reinit_locked(vsi); @@ -3534,14 +3553,14 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), - q_vector->rx.target_itr); + q_vector->rx.target_itr >> 1); q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->tx.next_update = jiffies + 1; q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), - q_vector->tx.target_itr); + q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; wr32(hw, I40E_PFINT_RATEN(vector - 1), @@ -3646,11 +3665,11 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) /* set the ITR configuration */ q_vector->rx.next_update = jiffies + 1; q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1); q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->tx.next_update = jiffies + 1; q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); - wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr); + wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; i40e_enable_misc_int_causes(pf); @@ -7168,6 +7187,7 @@ static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt, ch->num_queue_pairs = qcnt; if (!i40e_setup_channel(pf, vsi, ch)) { ret = -EINVAL; + kfree(ch); goto err_free; } ch->parent_vsi = vsi; @@ -11396,7 +11416,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) /* associate no queues to the misc vector */ wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST); - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1); i40e_flush(hw); @@ -12850,6 +12870,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, + .ndo_get_vf_stats = i40e_get_vf_stats, .ndo_set_vf_rate = i40e_ndo_set_vf_bw, .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, @@ -12911,6 +12932,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_UDP_L4 | NETIF_F_SCTP_CRC | NETIF_F_RXHASH | NETIF_F_RXCSUM | diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index e4d8d20baf3b..7164f4ad8120 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -323,20 +323,24 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, /** * i40e_read_nvm_module_data - Reads NVM Buffer to specified memory location - * @hw: pointer to the HW structure + * @hw: Pointer to the HW structure * @module_ptr: Pointer to module in words with respect to NVM beginning - * @offset: offset in words from module start + * @module_offset: Offset in words from module start + * @data_offset: Offset in words from reading data area start * @words_data_size: Words to read from NVM * @data_ptr: Pointer to memory location where resulting buffer will be stored **/ -i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw, - u8 module_ptr, u16 offset, - u16 words_data_size, - u16 *data_ptr) +enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw, + u8 module_ptr, + u16 module_offset, + u16 data_offset, + u16 words_data_size, + u16 *data_ptr) { i40e_status status; + u16 specific_ptr = 0; u16 ptr_value = 0; - u32 flat_offset; + u32 offset = 0; if (module_ptr != 0) { status = i40e_read_nvm_word(hw, module_ptr, &ptr_value); @@ -352,36 +356,35 @@ i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw, /* Pointer not initialized */ if (ptr_value == I40E_NVM_INVALID_PTR_VAL || - ptr_value == I40E_NVM_INVALID_VAL) + ptr_value == I40E_NVM_INVALID_VAL) { + i40e_debug(hw, I40E_DEBUG_ALL, "Pointer not initialized.\n"); return I40E_ERR_BAD_PTR; + } /* Check whether the module is in SR mapped area or outside */ if (ptr_value & I40E_PTR_TYPE) { /* Pointer points outside of the Shared RAM mapped area */ - ptr_value &= ~I40E_PTR_TYPE; + i40e_debug(hw, I40E_DEBUG_ALL, + "Reading nvm data failed. Pointer points outside of the Shared RAM mapped area.\n"); - /* PtrValue in 4kB units, need to convert to words */ - ptr_value /= 2; - flat_offset = ((u32)ptr_value * 0x1000) + (u32)offset; - status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); - if (!status) { - status = i40e_aq_read_nvm(hw, 0, 2 * flat_offset, - 2 * words_data_size, - data_ptr, true, NULL); - i40e_release_nvm(hw); - if (status) { - i40e_debug(hw, I40E_DEBUG_ALL, - "Reading nvm aq failed.Error code: %d.\n", - status); - return I40E_ERR_NVM; - } - } else { - return I40E_ERR_NVM; - } + return I40E_ERR_PARAM; } else { /* Read from the Shadow RAM */ - status = i40e_read_nvm_buffer(hw, ptr_value + offset, - &words_data_size, data_ptr); + + status = i40e_read_nvm_word(hw, ptr_value + module_offset, + &specific_ptr); + if (status) { + i40e_debug(hw, I40E_DEBUG_ALL, + "Reading nvm word failed.Error code: %d.\n", + status); + return I40E_ERR_NVM; + } + + offset = ptr_value + module_offset + specific_ptr + + data_offset; + + status = i40e_read_nvm_buffer(hw, offset, &words_data_size, + data_ptr); if (status) { i40e_debug(hw, I40E_DEBUG_ALL, "Reading nvm buffer failed.Error code: %d.\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 5250441bf75b..bbb478f09093 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -315,10 +315,12 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw, void i40e_release_nvm(struct i40e_hw *hw); i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, u16 *data); -i40e_status i40e_read_nvm_module_data(struct i40e_hw *hw, - u8 module_ptr, u16 offset, - u16 words_data_size, - u16 *data_ptr); +enum i40e_status_code i40e_read_nvm_module_data(struct i40e_hw *hw, + u8 module_ptr, + u16 module_offset, + u16 data_offset, + u16 words_data_size, + u16 *data_ptr); i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, u16 *words, u16 *data); i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw); @@ -409,14 +411,24 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); -i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, - u8 phy_select, u8 dev_addr, - u32 reg_addr, u32 reg_val, - struct i40e_asq_cmd_details *cmd_details); -i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, - u8 phy_select, u8 dev_addr, - u32 reg_addr, u32 *reg_val, - struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code +i40e_aq_set_phy_register_ext(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, bool page_change, + bool set_mdio, u8 mdio_num, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code +i40e_aq_get_phy_register_ext(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, bool page_change, + bool set_mdio, u8 mdio_num, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); + +/* Convenience wrappers for most common use case */ +#define i40e_aq_set_phy_register(hw, ps, da, pc, ra, rv, cd) \ + i40e_aq_set_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd) +#define i40e_aq_get_phy_register(hw, ps, da, pc, ra, rv, cd) \ + i40e_aq_get_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd) i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw, u16 reg, u8 phy_addr, u16 *value); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e3f29dc8b290..b8496037ef7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2960,10 +2960,16 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); - /* compute length of segmentation header */ - *hdr_len = (l4.tcp->doff * 4) + l4_offset; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + csum_replace_by_diff(&l4.udp->check, (__force __wsum)htonl(paylen)); + /* compute length of segmentation header */ + *hdr_len = sizeof(*l4.udp) + l4_offset; + } else { + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + } /* pull values out of skb_shinfo */ gso_size = skb_shinfo(skb)->gso_size; diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index b43ec94a0f29..6ea2867ff60f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -624,6 +624,7 @@ struct i40e_hw { #define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3) #define I40E_HW_FLAG_FW_LLDP_STOPPABLE BIT_ULL(4) #define I40E_HW_FLAG_FW_LLDP_PERSISTENT BIT_ULL(5) +#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6) #define I40E_HW_FLAG_DROP_MODE BIT_ULL(7) u64 flags; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 3d2440838822..6a3f0fc56c3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -955,7 +955,6 @@ static void i40e_free_vf_res(struct i40e_vf *vf) i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]); vf->lan_vsi_idx = 0; vf->lan_vsi_id = 0; - vf->num_mac = 0; } /* do the accounting and remove additional ADq VSI's */ @@ -2548,20 +2547,12 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, struct virtchnl_ether_addr_list *al) { struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx]; + int mac2add_cnt = 0; int i; - /* If this VF is not privileged, then we can't add more than a limited - * number of addresses. Check to make sure that the additions do not - * push us over the limit. - */ - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && - (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) { - dev_err(&pf->pdev->dev, - "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n"); - return -EPERM; - } - for (i = 0; i < al->num_elements; i++) { + struct i40e_mac_filter *f; u8 *addr = al->list[i].addr; if (is_broadcast_ether_addr(addr) || @@ -2585,8 +2576,24 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); return -EPERM; } + + /*count filters that really will be added*/ + f = i40e_find_mac(vsi, addr); + if (!f) + ++mac2add_cnt; } + /* If this VF is not privileged, then we can't add more than a limited + * number of addresses. Check to make sure that the additions do not + * push us over the limit. + */ + if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && + (i40e_count_filters(vsi) + mac2add_cnt) > + I40E_VC_MAX_MAC_ADDR_PER_VF) { + dev_err(&pf->pdev->dev, + "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n"); + return -EPERM; + } return 0; } @@ -2640,8 +2647,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_PARAM; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; - } else { - vf->num_mac++; } } } @@ -2689,16 +2694,6 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } - - if (vf->pf_set_mac && - ether_addr_equal(al->list[i].addr, - vf->default_lan_addr.addr)) { - dev_err(&pf->pdev->dev, - "MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n", - vf->default_lan_addr.addr, vf->vf_id); - ret = I40E_ERR_PARAM; - goto error_param; - } } vsi = pf->vsi[vf->lan_vsi_idx]; @@ -2709,8 +2704,6 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = I40E_ERR_INVALID_MAC_ADDR; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; - } else { - vf->num_mac--; } spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -4531,3 +4524,51 @@ out: clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state); return ret; } + +/** + * i40e_get_vf_stats - populate some stats for the VF + * @netdev: the netdev of the PF + * @vf_id: the host OS identifier (0-127) + * @vf_stats: pointer to the OS memory to be initialized + */ +int i40e_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_pf *pf = np->vsi->back; + struct i40e_eth_stats *stats; + struct i40e_vsi *vsi; + struct i40e_vf *vf; + + /* validate the request */ + if (i40e_validate_vf(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { + dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id); + return -EBUSY; + } + + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) + return -EINVAL; + + i40e_update_eth_stats(vsi); + stats = &vsi->eth_stats; + + memset(vf_stats, 0, sizeof(*vf_stats)); + + vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast + + stats->rx_multicast; + vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast + + stats->tx_multicast; + vf_stats->rx_bytes = stats->rx_bytes; + vf_stats->tx_bytes = stats->tx_bytes; + vf_stats->broadcast = stats->rx_broadcast; + vf_stats->multicast = stats->rx_multicast; + vf_stats->rx_dropped = stats->rx_discards; + vf_stats->tx_dropped = stats->tx_discards; + + return 0; +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 7164b9bb294f..631248c0981a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -101,7 +101,6 @@ struct i40e_vf { bool link_up; /* only valid if VF link is forced */ bool queues_enabled; /* true if the VF queues are enabled */ bool spoofchk; - u16 num_mac; u16 num_vlan; /* ADq related variables */ @@ -139,5 +138,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable); void i40e_vc_notify_link_state(struct i40e_pf *pf); void i40e_vc_notify_reset(struct i40e_pf *pf); +int i40e_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats); #endif /* _I40E_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 9edde960b4f2..7cb829132d28 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -13,9 +13,12 @@ ice-y := ice_main.o \ ice_nvm.o \ ice_switch.o \ ice_sched.o \ + ice_base.o \ ice_lib.o \ + ice_txrx_lib.o \ ice_txrx.o \ ice_flex_pipe.o \ ice_ethtool.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o -ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o +ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o +ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 45e100666049..f972dce8aebb 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -29,10 +29,13 @@ #include <linux/ip.h> #include <linux/sctp.h> #include <linux/ipv6.h> +#include <linux/pkt_sched.h> #include <linux/if_bridge.h> #include <linux/ctype.h> +#include <linux/bpf.h> #include <linux/avf/virtchnl.h> #include <net/ipv6.h> +#include <net/xdp_sock.h> #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" @@ -42,6 +45,7 @@ #include "ice_sched.h" #include "ice_virtchnl_pf.h" #include "ice_sriov.h" +#include "ice_xsk.h" extern const char ice_drv_ver[]; #define ICE_BAR0 0 @@ -78,8 +82,7 @@ extern const char ice_drv_ver[]; #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) -#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \ - (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))) +#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD) #define ICE_UP_TABLE_TRANSLATE(val, i) \ (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \ @@ -127,6 +130,16 @@ extern const char ice_drv_ver[]; ICE_PROMISC_VLAN_TX | \ ICE_PROMISC_VLAN_RX) +#define ice_pf_to_dev(pf) (&((pf)->pdev->dev)) + +struct ice_txq_meta { + u32 q_teid; /* Tx-scheduler element identifier */ + u16 q_id; /* Entry in VSI's txq_map bitmap */ + u16 q_handle; /* Relative index of Tx queue within TC */ + u16 vsi_idx; /* VSI index that Tx queue belongs to */ + u8 tc; /* TC number that Tx queue belongs to */ +}; + struct ice_tc_info { u16 qoffset; u16 qcount_tx; @@ -169,6 +182,7 @@ enum ice_state { __ICE_NEEDS_RESTART, __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ __ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ + __ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */ __ICE_PFR_REQ, /* set by driver and peers */ __ICE_CORER_REQ, /* set by driver and peers */ __ICE_GLOBR_REQ, /* set by driver and peers */ @@ -271,9 +285,18 @@ struct ice_vsi { u16 num_txq; /* Used Tx queues */ u16 alloc_rxq; /* Allocated Rx queues */ u16 num_rxq; /* Used Rx queues */ + u16 req_txq; /* User requested Tx queues */ + u16 req_rxq; /* User requested Rx queues */ u16 num_rx_desc; u16 num_tx_desc; struct ice_tc_cfg tc_cfg; + struct bpf_prog *xdp_prog; + struct ice_ring **xdp_rings; /* XDP ring array */ + u16 num_xdp_txq; /* Used XDP queues */ + u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + struct xdp_umem **xsk_umems; + u16 num_xsk_umems_used; + u16 num_xsk_umems; } ____cacheline_internodealigned_in_smp; /* struct that defines an interrupt vector */ @@ -313,6 +336,7 @@ enum ice_pf_flags { ICE_FLAG_NO_MEDIA, ICE_FLAG_FW_LLDP_AGENT, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ + ICE_FLAG_LEGACY_RX, ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -346,6 +370,7 @@ struct ice_pf { struct work_struct serv_task; struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ + struct mutex tc_mutex; /* lock to protect TC changes */ u32 msg_enable; u32 hw_csum_rx_error; u32 oicr_idx; /* Other interrupt cause MSIX vector index */ @@ -417,6 +442,37 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev) return np->vsi->back; } +static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) +{ + return !!vsi->xdp_prog; +} + +static inline void ice_set_ring_xdp(struct ice_ring *ring) +{ + ring->flags |= ICE_TX_FLAGS_RING_XDP; +} + +/** + * ice_xsk_umem - get XDP UMEM bound to a ring + * @ring - ring to use + * + * Returns a pointer to xdp_umem structure if there is an UMEM present, + * NULL otherwise. + */ +static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring) +{ + struct xdp_umem **umems = ring->vsi->xsk_umems; + int qid = ring->q_index; + + if (ice_ring_is_xdp(ring)) + qid -= ring->vsi->num_xdp_txq; + + if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi)) + return NULL; + + return umems[qid]; +} + /** * ice_get_main_vsi - Get the PF VSI * @pf: PF instance @@ -437,20 +493,23 @@ void ice_set_ethtool_ops(struct net_device *netdev); void ice_set_ethtool_safe_mode_ops(struct net_device *netdev); u16 ice_get_avail_txq_count(struct ice_pf *pf); u16 ice_get_avail_rxq_count(struct ice_pf *pf); +int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx); void ice_update_vsi_stats(struct ice_vsi *vsi); void ice_update_pf_stats(struct ice_pf *pf); int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); int ice_vsi_cfg(struct ice_vsi *vsi); struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); +int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog); +int ice_destroy_xdp_rings(struct ice_vsi *vsi); +int +ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); +int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); -#ifdef CONFIG_DCB -int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked); -void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked); -#endif /* CONFIG_DCB */ int ice_open(struct net_device *netdev); int ice_stop(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 023e3d2fee5f..5421fc413f94 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -742,6 +742,10 @@ struct ice_aqc_add_elem { struct ice_aqc_txsched_elem_data generic[1]; }; +struct ice_aqc_conf_elem { + struct ice_aqc_txsched_elem_data generic[1]; +}; + struct ice_aqc_get_elem { struct ice_aqc_txsched_elem_data generic[1]; }; @@ -783,6 +787,44 @@ struct ice_aqc_port_ets_elem { __le32 tc_node_teid[8]; /* Used for response, reserved in command */ }; +/* Rate limiting profile for + * Add RL profile (indirect 0x0410) + * Query RL profile (indirect 0x0411) + * Remove RL profile (indirect 0x0415) + * These indirect commands acts on single or multiple + * RL profiles with specified data. + */ +struct ice_aqc_rl_profile { + __le16 num_profiles; + __le16 num_processed; /* Only for response. Reserved in Command. */ + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_rl_profile_elem { + u8 level; + u8 flags; +#define ICE_AQC_RL_PROFILE_TYPE_S 0x0 +#define ICE_AQC_RL_PROFILE_TYPE_M (0x3 << ICE_AQC_RL_PROFILE_TYPE_S) +#define ICE_AQC_RL_PROFILE_TYPE_CIR 0 +#define ICE_AQC_RL_PROFILE_TYPE_EIR 1 +#define ICE_AQC_RL_PROFILE_TYPE_SRL 2 +/* The following flag is used for Query RL Profile Data */ +#define ICE_AQC_RL_PROFILE_INVAL_S 0x7 +#define ICE_AQC_RL_PROFILE_INVAL_M (0x1 << ICE_AQC_RL_PROFILE_INVAL_S) + + __le16 profile_id; + __le16 max_burst_size; + __le16 rl_multiply; + __le16 wake_up_calc; + __le16 rl_encode; +}; + +struct ice_aqc_rl_profile_generic_elem { + struct ice_aqc_rl_profile_elem generic[1]; +}; + /* Query Scheduler Resource Allocation (indirect 0x0412) * This indirect command retrieves the scheduler resources allocated by * EMP Firmware to the given PF. @@ -1044,6 +1086,10 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_LINK_TOPO_CONFLICT BIT(0) #define ICE_AQ_LINK_MEDIA_CONFLICT BIT(1) #define ICE_AQ_LINK_TOPO_CORRUPT BIT(2) +#define ICE_AQ_LINK_TOPO_UNREACH_PRT BIT(4) +#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT BIT(5) +#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA BIT(6) +#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA BIT(7) u8 reserved1; u8 link_info; #define ICE_AQ_LINK_UP BIT(0) /* Link Status */ @@ -1147,6 +1193,33 @@ struct ice_aqc_set_port_id_led { u8 rsvd[13]; }; +/* Read/Write SFF EEPROM command (indirect 0x06EE) */ +struct ice_aqc_sff_eeprom { + u8 lport_num; + u8 lport_num_valid; +#define ICE_AQC_SFF_PORT_NUM_VALID BIT(0) + __le16 i2c_bus_addr; +#define ICE_AQC_SFF_I2CBUS_7BIT_M 0x7F +#define ICE_AQC_SFF_I2CBUS_10BIT_M 0x3FF +#define ICE_AQC_SFF_I2CBUS_TYPE_M BIT(10) +#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT 0 +#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT ICE_AQC_SFF_I2CBUS_TYPE_M +#define ICE_AQC_SFF_SET_EEPROM_PAGE_S 11 +#define ICE_AQC_SFF_SET_EEPROM_PAGE_M (0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S) +#define ICE_AQC_SFF_NO_PAGE_CHANGE 0 +#define ICE_AQC_SFF_SET_23_ON_MISMATCH 1 +#define ICE_AQC_SFF_SET_22_ON_MISMATCH 2 +#define ICE_AQC_SFF_IS_WRITE BIT(15) + __le16 i2c_mem_addr; + __le16 eeprom_page; +#define ICE_AQC_SFF_EEPROM_BANK_S 0 +#define ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S) +#define ICE_AQC_SFF_EEPROM_PAGE_S 8 +#define ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S) + __le32 addr_high; + __le32 addr_low; +}; + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1618,6 +1691,7 @@ struct ice_aq_desc { struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_set_phy_cfg set_phy; struct ice_aqc_restart_an restart_an; + struct ice_aqc_sff_eeprom read_write_sff_param; struct ice_aqc_set_port_id_led set_port_id_led; struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_sw_rules sw_rules; @@ -1625,6 +1699,7 @@ struct ice_aq_desc { struct ice_aqc_sched_elem_cmd sched_elem_cmd; struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_query_port_ets port_ets; + struct ice_aqc_rl_profile rl_profile; struct ice_aqc_nvm nvm; struct ice_aqc_nvm_checksum nvm_checksum; struct ice_aqc_pf_vf_msg virt; @@ -1726,12 +1801,15 @@ enum ice_adminq_opc { /* transmit scheduler commands */ ice_aqc_opc_get_dflt_topo = 0x0400, ice_aqc_opc_add_sched_elems = 0x0401, + ice_aqc_opc_cfg_sched_elems = 0x0403, ice_aqc_opc_get_sched_elems = 0x0404, ice_aqc_opc_suspend_sched_elems = 0x0409, ice_aqc_opc_resume_sched_elems = 0x040A, ice_aqc_opc_query_port_ets = 0x040E, ice_aqc_opc_delete_sched_elems = 0x040F, + ice_aqc_opc_add_rl_profiles = 0x0410, ice_aqc_opc_query_sched_res = 0x0412, + ice_aqc_opc_remove_rl_profiles = 0x0415, /* PHY commands */ ice_aqc_opc_get_phy_caps = 0x0600, @@ -1741,6 +1819,7 @@ enum ice_adminq_opc { ice_aqc_opc_set_event_mask = 0x0613, ice_aqc_opc_set_mac_lb = 0x0620, ice_aqc_opc_set_port_id_led = 0x06E9, + ice_aqc_opc_sff_eeprom = 0x06EE, /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c new file mode 100644 index 000000000000..77d6a0291e97 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -0,0 +1,859 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice_base.h" +#include "ice_dcb_lib.h" + +/** + * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI + * @qs_cfg: gathered variables needed for PF->VSI queues assignment + * + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap + */ +static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) +{ + int offset, i; + + mutex_lock(qs_cfg->qs_mutex); + offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size, + 0, qs_cfg->q_count, 0); + if (offset >= qs_cfg->pf_map_size) { + mutex_unlock(qs_cfg->qs_mutex); + return -ENOMEM; + } + + bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count); + for (i = 0; i < qs_cfg->q_count; i++) + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset; + mutex_unlock(qs_cfg->qs_mutex); + + return 0; +} + +/** + * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI + * @qs_cfg: gathered variables needed for pf->vsi queues assignment + * + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap + */ +static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg) +{ + int i, index = 0; + + mutex_lock(qs_cfg->qs_mutex); + for (i = 0; i < qs_cfg->q_count; i++) { + index = find_next_zero_bit(qs_cfg->pf_map, + qs_cfg->pf_map_size, index); + if (index >= qs_cfg->pf_map_size) + goto err_scatter; + set_bit(index, qs_cfg->pf_map); + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index; + } + mutex_unlock(qs_cfg->qs_mutex); + + return 0; +err_scatter: + for (index = 0; index < i; index++) { + clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map); + qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0; + } + mutex_unlock(qs_cfg->qs_mutex); + + return -ENOMEM; +} + +/** + * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled + * @pf: the PF being configured + * @pf_q: the PF queue + * @ena: enable or disable state of the queue + * + * This routine will wait for the given Rx queue of the PF to reach the + * enabled or disabled state. + * Returns -ETIMEDOUT in case of failing to reach the requested state after + * multiple retries; else will return 0 in case of success. + */ +static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) +{ + int i; + + for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) { + if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) & + QRX_CTRL_QENA_STAT_M)) + return 0; + + usleep_range(20, 40); + } + + return -ETIMEDOUT; +} + +/** + * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector + * @vsi: the VSI being configured + * @v_idx: index of the vector in the VSI struct + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + */ +static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_q_vector *q_vector; + + /* allocate q_vector */ + q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector), + GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + q_vector->vsi = vsi; + q_vector->v_idx = v_idx; + if (vsi->type == ICE_VSI_VF) + goto out; + /* only set affinity_mask if the CPU is online */ + if (cpu_online(v_idx)) + cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + + /* This will not be called in the driver load path because the netdev + * will not be created yet. All other cases with register the NAPI + * handler here (i.e. resume, reset/rebuild, etc.) + */ + if (vsi->netdev) + netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, + NAPI_POLL_WEIGHT); + +out: + /* tie q_vector and VSI together */ + vsi->q_vectors[v_idx] = q_vector; + + return 0; +} + +/** + * ice_free_q_vector - Free memory allocated for a specific interrupt vector + * @vsi: VSI having the memory freed + * @v_idx: index of the vector to be freed + */ +static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_q_vector *q_vector; + struct ice_pf *pf = vsi->back; + struct ice_ring *ring; + struct device *dev; + + dev = ice_pf_to_dev(pf); + if (!vsi->q_vectors[v_idx]) { + dev_dbg(dev, "Queue vector at index %d not found\n", v_idx); + return; + } + q_vector = vsi->q_vectors[v_idx]; + + ice_for_each_ring(ring, q_vector->tx) + ring->q_vector = NULL; + ice_for_each_ring(ring, q_vector->rx) + ring->q_vector = NULL; + + /* only VSI with an associated netdev is set up with NAPI */ + if (vsi->netdev) + netif_napi_del(&q_vector->napi); + + devm_kfree(dev, q_vector); + vsi->q_vectors[v_idx] = NULL; +} + +/** + * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set + * @hw: board specific structure + */ +static void ice_cfg_itr_gran(struct ice_hw *hw) +{ + u32 regval = rd32(hw, GLINT_CTL); + + /* no need to update global register if ITR gran is already set */ + if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) && + (((regval & GLINT_CTL_ITR_GRAN_200_M) >> + GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_100_M) >> + GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_50_M) >> + GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_25_M) >> + GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US)) + return; + + regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) & + GLINT_CTL_ITR_GRAN_200_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) & + GLINT_CTL_ITR_GRAN_100_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) & + GLINT_CTL_ITR_GRAN_50_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) & + GLINT_CTL_ITR_GRAN_25_M); + wr32(hw, GLINT_CTL, regval); +} + +/** + * ice_calc_q_handle - calculate the queue handle + * @vsi: VSI that ring belongs to + * @ring: ring to get the absolute queue index + * @tc: traffic class number + */ +static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) +{ + WARN_ONCE(ice_ring_is_xdp(ring) && tc, + "XDP ring can't belong to TC other than 0"); + + /* Idea here for calculation is that we subtract the number of queue + * count from TC that ring belongs to from it's absolute queue index + * and as a result we get the queue's index within TC. + */ + return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset; +} + +/** + * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance + * @ring: The Tx ring to configure + * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized + * @pf_q: queue index in the PF space + * + * Configure the Tx descriptor ring in TLAN context. + */ +static void +ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + + tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; + + tlan_ctx->port_num = vsi->port_info->lport; + + /* Transmit Queue Length */ + tlan_ctx->qlen = ring->count; + + ice_set_cgd_num(tlan_ctx, ring); + + /* PF number */ + tlan_ctx->pf_num = hw->pf_id; + + /* queue belongs to a specific VSI type + * VF / VM index should be programmed per vmvf_type setting: + * for vmvf_type = VF, it is VF number between 0-256 + * for vmvf_type = VM, it is VM number between 0-767 + * for PF or EMP this field should be set to zero + */ + switch (vsi->type) { + case ICE_VSI_LB: + /* fall through */ + case ICE_VSI_PF: + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + break; + case ICE_VSI_VF: + /* Firmware expects vmvf_num to be absolute VF ID */ + tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id; + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; + break; + default: + return; + } + + /* make sure the context is associated with the right VSI */ + tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); + + tlan_ctx->tso_ena = ICE_TX_LEGACY; + tlan_ctx->tso_qnum = pf_q; + + /* Legacy or Advanced Host Interface: + * 0: Advanced Host Interface + * 1: Legacy Host Interface + */ + tlan_ctx->legacy_int = ICE_TX_LEGACY; +} + +/** + * ice_setup_rx_ctx - Configure a receive ring context + * @ring: The Rx ring to configure + * + * Configure the Rx descriptor ring in RLAN context. + */ +int ice_setup_rx_ctx(struct ice_ring *ring) +{ + int chain_len = ICE_MAX_CHAINED_RX_BUFS; + struct ice_vsi *vsi = ring->vsi; + u32 rxdid = ICE_RXDID_FLEX_NIC; + struct ice_rlan_ctx rlan_ctx; + struct ice_hw *hw; + u32 regval; + u16 pf_q; + int err; + + hw = &vsi->back->hw; + + /* what is Rx queue number in global space of 2K Rx queues */ + pf_q = vsi->rxq_map[ring->q_index]; + + /* clear the context structure first */ + memset(&rlan_ctx, 0, sizeof(rlan_ctx)); + + ring->rx_buf_len = vsi->rx_buf_len; + + if (ring->vsi->type == ICE_VSI_PF) { + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index); + + ring->xsk_umem = ice_xsk_umem(ring); + if (ring->xsk_umem) { + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + + ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr - + XDP_PACKET_HEADROOM; + /* For AF_XDP ZC, we disallow packets to span on + * multiple buffers, thus letting us skip that + * handling in the fast-path. + */ + chain_len = 1; + ring->zca.free = ice_zca_free; + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_ZERO_COPY, + &ring->zca); + if (err) + return err; + + dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", + ring->q_index); + } else { + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) + xdp_rxq_info_reg(&ring->xdp_rxq, + ring->netdev, + ring->q_index); + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + return err; + } + } + /* Receive Queue Base Address. + * Indicates the starting address of the descriptor queue defined in + * 128 Byte units. + */ + rlan_ctx.base = ring->dma >> 7; + + rlan_ctx.qlen = ring->count; + + /* Receive Packet Data Buffer Size. + * The Packet Data Buffer Size is defined in 128 byte units. + */ + rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; + + /* use 32 byte descriptors */ + rlan_ctx.dsize = 1; + + /* Strip the Ethernet CRC bytes before the packet is posted to host + * memory. + */ + rlan_ctx.crcstrip = 1; + + /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ + rlan_ctx.l2tsel = 1; + + rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; + + /* This controls whether VLAN is stripped from inner headers + * The VLAN in the inner L2 header is stripped to the receive + * descriptor if enabled by this flag. + */ + rlan_ctx.showiv = 0; + + /* Max packet size for this queue - must not be set to a larger value + * than 5 x DBUF + */ + rlan_ctx.rxmax = min_t(u16, vsi->max_frame, + chain_len * ring->rx_buf_len); + + /* Rx queue threshold in units of 64 */ + rlan_ctx.lrxqthresh = 1; + + /* Enable Flexible Descriptors in the queue context which + * allows this driver to select a specific receive descriptor format + */ + if (vsi->type != ICE_VSI_VF) { + regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); + regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & + QRXFLXP_CNTXT_RXDID_IDX_M; + + /* increasing context priority to pick up profile ID; + * default is 0x01; setting to 0x03 to ensure profile + * is programming if prev context is of same priority + */ + regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & + QRXFLXP_CNTXT_RXDID_PRIO_M; + + wr32(hw, QRXFLXP_CNTXT(pf_q), regval); + } + + /* Absolute queue number out of 2K needs to be passed */ + err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); + if (err) { + dev_err(&vsi->back->pdev->dev, + "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", + pf_q, err); + return -EIO; + } + + if (vsi->type == ICE_VSI_VF) + return 0; + + /* configure Rx buffer alignment */ + if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) + ice_clear_ring_build_skb_ena(ring); + else + ice_set_ring_build_skb_ena(ring); + + /* init queue specific tail register */ + ring->tail = hw->hw_addr + QRX_TAIL(pf_q); + writel(0, ring->tail); + + err = ring->xsk_umem ? + ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) : + ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); + if (err) + dev_info(&vsi->back->pdev->dev, + "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", + ring->xsk_umem ? "UMEM enabled " : "", + ring->q_index, pf_q); + + return 0; +} + +/** + * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI + * @qs_cfg: gathered variables needed for pf->vsi queues assignment + * + * This function first tries to find contiguous space. If it is not successful, + * it tries with the scatter approach. + * + * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap + */ +int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg) +{ + int ret = 0; + + ret = __ice_vsi_get_qs_contig(qs_cfg); + if (ret) { + /* contig failed, so try with scatter approach */ + qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER; + qs_cfg->q_count = min_t(u16, qs_cfg->q_count, + qs_cfg->scatter_count); + ret = __ice_vsi_get_qs_sc(qs_cfg); + } + return ret; +} + +/** + * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring + * @vsi: the VSI being configured + * @ena: start or stop the Rx rings + * @rxq_idx: Rx queue index + */ +int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx) +{ + int pf_q = vsi->rxq_map[rxq_idx]; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int ret = 0; + u32 rx_reg; + + rx_reg = rd32(hw, QRX_CTRL(pf_q)); + + /* Skip if the queue is already in the requested state */ + if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) + return 0; + + /* turn on/off the queue */ + if (ena) + rx_reg |= QRX_CTRL_QENA_REQ_M; + else + rx_reg &= ~QRX_CTRL_QENA_REQ_M; + wr32(hw, QRX_CTRL(pf_q), rx_reg); + + /* wait for the change to finish */ + ret = ice_pf_rxq_wait(pf, pf_q, ena); + if (ret) + dev_err(ice_pf_to_dev(pf), + "VSI idx %d Rx ring %d %sable timeout\n", + vsi->idx, pf_q, (ena ? "en" : "dis")); + + return ret; +} + +/** + * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors + * @vsi: the VSI being configured + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int v_idx = 0, num_q_vectors; + struct device *dev; + int err; + + dev = ice_pf_to_dev(pf); + if (vsi->q_vectors[0]) { + dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num); + return -EEXIST; + } + + num_q_vectors = vsi->num_q_vectors; + + for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { + err = ice_vsi_alloc_q_vector(vsi, v_idx); + if (err) + goto err_out; + } + + return 0; + +err_out: + while (v_idx--) + ice_free_q_vector(vsi, v_idx); + + dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n", + vsi->num_q_vectors, vsi->vsi_num, err); + vsi->num_q_vectors = 0; + return err; +} + +/** + * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors + * @vsi: the VSI being configured + * + * This function maps descriptor rings to the queue-specific vectors allotted + * through the MSI-X enabling code. On a constrained vector budget, we map Tx + * and Rx rings to the vector as "efficiently" as possible. + */ +void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) +{ + int q_vectors = vsi->num_q_vectors; + int tx_rings_rem, rx_rings_rem; + int v_id; + + /* initially assigning remaining rings count to VSIs num queue value */ + tx_rings_rem = vsi->num_txq; + rx_rings_rem = vsi->num_rxq; + + for (v_id = 0; v_id < q_vectors; v_id++) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; + int tx_rings_per_v, rx_rings_per_v, q_id, q_base; + + /* Tx rings mapping to vector */ + tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); + q_vector->num_ring_tx = tx_rings_per_v; + q_vector->tx.ring = NULL; + q_vector->tx.itr_idx = ICE_TX_ITR; + q_base = vsi->num_txq - tx_rings_rem; + + for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { + struct ice_ring *tx_ring = vsi->tx_rings[q_id]; + + tx_ring->q_vector = q_vector; + tx_ring->next = q_vector->tx.ring; + q_vector->tx.ring = tx_ring; + } + tx_rings_rem -= tx_rings_per_v; + + /* Rx rings mapping to vector */ + rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); + q_vector->num_ring_rx = rx_rings_per_v; + q_vector->rx.ring = NULL; + q_vector->rx.itr_idx = ICE_RX_ITR; + q_base = vsi->num_rxq - rx_rings_rem; + + for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { + struct ice_ring *rx_ring = vsi->rx_rings[q_id]; + + rx_ring->q_vector = q_vector; + rx_ring->next = q_vector->rx.ring; + q_vector->rx.ring = rx_ring; + } + rx_rings_rem -= rx_rings_per_v; + } +} + +/** + * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors + * @vsi: the VSI having memory freed + */ +void ice_vsi_free_q_vectors(struct ice_vsi *vsi) +{ + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) + ice_free_q_vector(vsi, v_idx); +} + +/** + * ice_vsi_cfg_txq - Configure single Tx queue + * @vsi: the VSI that queue belongs to + * @ring: Tx ring to be configured + * @qg_buf: queue group buffer + */ +int +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_aqc_add_tx_qgrp *qg_buf) +{ + struct ice_tlan_ctx tlan_ctx = { 0 }; + struct ice_aqc_add_txqs_perq *txq; + struct ice_pf *pf = vsi->back; + u8 buf_len = sizeof(*qg_buf); + enum ice_status status; + u16 pf_q; + u8 tc; + + pf_q = ring->reg_idx; + ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); + /* copy context contents into the qg_buf */ + qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); + ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, + ice_tlan_ctx_info); + + /* init queue specific tail reg. It is referred as + * transmit comm scheduler queue doorbell. + */ + ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); + + if (IS_ENABLED(CONFIG_DCB)) + tc = ring->dcb_tc; + else + tc = 0; + + /* Add unique software queue handle of the Tx queue per + * TC into the VSI Tx ring + */ + ring->q_handle = ice_calc_q_handle(vsi, ring, tc); + + status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, + 1, qg_buf, buf_len, NULL); + if (status) { + dev_err(ice_pf_to_dev(pf), + "Failed to set LAN Tx queue context, error: %d\n", + status); + return -ENODEV; + } + + /* Add Tx Queue TEID into the VSI Tx ring from the + * response. This will complete configuring and + * enabling the queue. + */ + txq = &qg_buf->txqs[0]; + if (pf_q == le16_to_cpu(txq->txq_id)) + ring->txq_teid = le32_to_cpu(txq->q_teid); + + return 0; +} + +/** + * ice_cfg_itr - configure the initial interrupt throttle values + * @hw: pointer to the HW structure + * @q_vector: interrupt vector that's being configured + * + * Configure interrupt throttling values for the ring containers that are + * associated with the interrupt vector passed in. + */ +void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) +{ + ice_cfg_itr_gran(hw); + + if (q_vector->num_ring_rx) { + struct ice_ring_container *rc = &q_vector->rx; + + /* if this value is set then don't overwrite with default */ + if (!rc->itr_setting) + rc->itr_setting = ICE_DFLT_RX_ITR; + + rc->target_itr = ITR_TO_REG(rc->itr_setting); + rc->next_update = jiffies + 1; + rc->current_itr = rc->target_itr; + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); + } + + if (q_vector->num_ring_tx) { + struct ice_ring_container *rc = &q_vector->tx; + + /* if this value is set then don't overwrite with default */ + if (!rc->itr_setting) + rc->itr_setting = ICE_DFLT_TX_ITR; + + rc->target_itr = ITR_TO_REG(rc->itr_setting); + rc->next_update = jiffies + 1; + rc->current_itr = rc->target_itr; + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); + } +} + +/** + * ice_cfg_txq_interrupt - configure interrupt on Tx queue + * @vsi: the VSI being configured + * @txq: Tx queue being mapped to MSI-X vector + * @msix_idx: MSI-X vector index within the function + * @itr_idx: ITR index of the interrupt cause + * + * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector + * within the function space. + */ +void +ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + + itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M; + + val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | + ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M); + + wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); + if (ice_is_xdp_ena_vsi(vsi)) { + u32 xdp_txq = txq + vsi->num_xdp_txq; + + wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), + val); + } + ice_flush(hw); +} + +/** + * ice_cfg_rxq_interrupt - configure interrupt on Rx queue + * @vsi: the VSI being configured + * @rxq: Rx queue being mapped to MSI-X vector + * @msix_idx: MSI-X vector index within the function + * @itr_idx: ITR index of the interrupt cause + * + * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector + * within the function space. + */ +void +ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + + itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M; + + val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | + ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M); + + wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + + ice_flush(hw); +} + +/** + * ice_trigger_sw_intr - trigger a software interrupt + * @hw: pointer to the HW structure + * @q_vector: interrupt vector to trigger the software interrupt for + */ +void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) +{ + wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), + (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) | + GLINT_DYN_CTL_SWINT_TRIG_M | + GLINT_DYN_CTL_INTENA_M); +} + +/** + * ice_vsi_stop_tx_ring - Disable single Tx ring + * @vsi: the VSI being configured + * @rst_src: reset source + * @rel_vmvf_num: Relative ID of VF/VM + * @ring: Tx ring to be stopped + * @txq_meta: Meta data of Tx ring to be stopped + */ +int +ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, + u16 rel_vmvf_num, struct ice_ring *ring, + struct ice_txq_meta *txq_meta) +{ + struct ice_pf *pf = vsi->back; + struct ice_q_vector *q_vector; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + u32 val; + + /* clear cause_ena bit for disabled queues */ + val = rd32(hw, QINT_TQCTL(ring->reg_idx)); + val &= ~QINT_TQCTL_CAUSE_ENA_M; + wr32(hw, QINT_TQCTL(ring->reg_idx), val); + + /* software is expected to wait for 100 ns */ + ndelay(100); + + /* trigger a software interrupt for the vector + * associated to the queue to schedule NAPI handler + */ + q_vector = ring->q_vector; + if (q_vector) + ice_trigger_sw_intr(hw, q_vector); + + status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx, + txq_meta->tc, 1, &txq_meta->q_handle, + &txq_meta->q_id, &txq_meta->q_teid, rst_src, + rel_vmvf_num, NULL); + + /* if the disable queue command was exercised during an + * active reset flow, ICE_ERR_RESET_ONGOING is returned. + * This is not an error as the reset operation disables + * queues at the hardware level anyway. + */ + if (status == ICE_ERR_RESET_ONGOING) { + dev_dbg(&vsi->back->pdev->dev, + "Reset in progress. LAN Tx queues already disabled\n"); + } else if (status == ICE_ERR_DOES_NOT_EXIST) { + dev_dbg(&vsi->back->pdev->dev, + "LAN Tx queues do not exist, nothing to disable\n"); + } else if (status) { + dev_err(&vsi->back->pdev->dev, + "Failed to disable LAN Tx queues, error: %d\n", status); + return -ENODEV; + } + + return 0; +} + +/** + * ice_fill_txq_meta - Prepare the Tx queue's meta data + * @vsi: VSI that ring belongs to + * @ring: ring that txq_meta will be based on + * @txq_meta: a helper struct that wraps Tx queue's information + * + * Set up a helper struct that will contain all the necessary fields that + * are needed for stopping Tx queue + */ +void +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_txq_meta *txq_meta) +{ + u8 tc; + + if (IS_ENABLED(CONFIG_DCB)) + tc = ring->dcb_tc; + else + tc = 0; + + txq_meta->q_id = ring->reg_idx; + txq_meta->q_teid = ring->txq_teid; + txq_meta->q_handle = ring->q_handle; + txq_meta->vsi_idx = vsi->idx; + txq_meta->tc = tc; +} diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h new file mode 100644 index 000000000000..407995e8e944 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_base.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_BASE_H_ +#define _ICE_BASE_H_ + +#include "ice.h" + +int ice_setup_rx_ctx(struct ice_ring *ring); +int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg); +int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx); +int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi); +void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi); +void ice_vsi_free_q_vectors(struct ice_vsi *vsi); +int +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_aqc_add_tx_qgrp *qg_buf); +void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector); +void +ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx); +void +ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); +void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); +int +ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, + u16 rel_vmvf_num, struct ice_ring *ring, + struct ice_txq_meta *txq_meta); +void +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, + struct ice_txq_meta *txq_meta); +#endif /* _ICE_BASE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 3a6b3950eb0e..fb1d930470c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -855,6 +855,9 @@ enum ice_status ice_init_hw(struct ice_hw *hw) goto err_unroll_sched; } INIT_LIST_HEAD(&hw->agg_list); + /* Initialize max burst size */ + if (!hw->max_burst_size) + ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE); status = ice_init_fltr_mgmt_struct(hw); if (status) @@ -1067,6 +1070,72 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req) } /** + * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA + * @hw: pointer to hardware structure + * @module_tlv: pointer to module TLV to return + * @module_tlv_len: pointer to module TLV length to return + * @module_type: module type requested + * + * Finds the requested sub module TLV type from the Preserved Field + * Area (PFA) and returns the TLV pointer and length. The caller can + * use these to read the variable length TLV value. + */ +enum ice_status +ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + u16 module_type) +{ + enum ice_status status; + u16 pfa_len, pfa_ptr; + u16 next_tlv; + + status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n"); + return status; + } + status = ice_read_sr_word(hw, pfa_ptr, &pfa_len); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n"); + return status; + } + /* Starting with first TLV after PFA length, iterate through the list + * of TLVs to find the requested one. + */ + next_tlv = pfa_ptr + 1; + while (next_tlv < pfa_ptr + pfa_len) { + u16 tlv_sub_module_type; + u16 tlv_len; + + /* Read TLV type */ + status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n"); + break; + } + /* Read TLV length */ + status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n"); + break; + } + if (tlv_sub_module_type == module_type) { + if (tlv_len) { + *module_tlv = next_tlv; + *module_tlv_len = tlv_len; + return 0; + } + return ICE_ERR_INVAL_SIZE; + } + /* Check next TLV, i.e. current TLV pointer + length + 2 words + * (for current TLV's type and length) + */ + next_tlv = next_tlv + tlv_len + 2; + } + /* Module does not exist */ + return ICE_ERR_DOES_NOT_EXIST; +} + +/** * ice_copy_rxq_ctx_to_hw * @hw: pointer to the hardware structure * @ice_rxq_ctx: pointer to the rxq context @@ -1182,56 +1251,6 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { { 0 } }; -/** - * ice_debug_cq - * @hw: pointer to the hardware structure - * @mask: debug mask - * @desc: pointer to control queue descriptor - * @buf: pointer to command buffer - * @buf_len: max length of buf - * - * Dumps debug log about control command with descriptor contents. - */ -void -ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, void *buf, - u16 buf_len) -{ - struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; - u16 len; - -#ifndef CONFIG_DYNAMIC_DEBUG - if (!(mask & hw->debug_mask)) - return; -#endif - - if (!desc) - return; - - len = le16_to_cpu(cq_desc->datalen); - - ice_debug(hw, mask, - "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", - le16_to_cpu(cq_desc->opcode), - le16_to_cpu(cq_desc->flags), - le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval)); - ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n", - le32_to_cpu(cq_desc->cookie_high), - le32_to_cpu(cq_desc->cookie_low)); - ice_debug(hw, mask, "\tparam (0,1) 0x%08X 0x%08X\n", - le32_to_cpu(cq_desc->params.generic.param0), - le32_to_cpu(cq_desc->params.generic.param1)); - ice_debug(hw, mask, "\taddr (h,l) 0x%08X 0x%08X\n", - le32_to_cpu(cq_desc->params.generic.addr_high), - le32_to_cpu(cq_desc->params.generic.addr_low)); - if (buf && cq_desc->datalen != 0) { - ice_debug(hw, mask, "Buffer:\n"); - if (buf_len < len) - len = buf_len; - - ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len); - } -} - /* FW Admin Queue command wrappers */ /* Software lock/mutex that is meant to be held while the Global Config Lock @@ -1654,6 +1673,10 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, ice_debug(hw, ICE_DBG_INIT, "%s: valid_functions (bitmap) = %d\n", prefix, caps->valid_functions); + + /* store func count for resource management purposes */ + if (dev_p) + dev_p->num_funcs = hweight32(number); break; case ICE_AQC_CAPS_SRIOV: caps->sr_iov_1_1 = (number == 1); @@ -1760,6 +1783,18 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, break; } } + + /* Re-calculate capabilities that are dependent on the number of + * physical ports; i.e. some features are not supported or function + * differently on devices with more than 4 ports. + */ + if (hw->dev_caps.num_funcs > 4) { + /* Max 4 TCs per port */ + caps->maxtc = 4; + ice_debug(hw, ICE_DBG_INIT, + "%s: maxtc = %d (based on #ports)\n", prefix, + caps->maxtc); + } } /** @@ -1856,8 +1891,7 @@ void ice_set_safe_mode_caps(struct ice_hw *hw) struct ice_hw_dev_caps *dev_caps = &hw->dev_caps; u32 valid_func, rxq_first_id, txq_first_id; u32 msix_vector_first_id, max_mtu; - u32 num_func = 0; - u8 i; + u32 num_funcs; /* cache some func_caps values that should be restored after memset */ valid_func = func_caps->common_cap.valid_functions; @@ -1890,6 +1924,7 @@ void ice_set_safe_mode_caps(struct ice_hw *hw) rxq_first_id = dev_caps->common_cap.rxq_first_id; msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id; max_mtu = dev_caps->common_cap.max_mtu; + num_funcs = dev_caps->num_funcs; /* unset dev capabilities */ memset(dev_caps, 0, sizeof(*dev_caps)); @@ -1900,19 +1935,14 @@ void ice_set_safe_mode_caps(struct ice_hw *hw) dev_caps->common_cap.rxq_first_id = rxq_first_id; dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id; dev_caps->common_cap.max_mtu = max_mtu; - - /* valid_func is a bitmap. get number of functions */ -#define ICE_MAX_FUNCS 8 - for (i = 0; i < ICE_MAX_FUNCS; i++) - if (valid_func & BIT(i)) - num_func++; + dev_caps->num_funcs = num_funcs; /* one Tx and one Rx queue per function in safe mode */ - dev_caps->common_cap.num_rxq = num_func; - dev_caps->common_cap.num_txq = num_func; + dev_caps->common_cap.num_rxq = num_funcs; + dev_caps->common_cap.num_txq = num_funcs; /* two MSIX vectors per function */ - dev_caps->common_cap.num_msix_vectors = 2 * num_func; + dev_caps->common_cap.num_msix_vectors = 2 * num_funcs; } /** @@ -2556,6 +2586,52 @@ ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, } /** + * ice_aq_sff_eeprom + * @hw: pointer to the HW struct + * @lport: bits [7:0] = logical port, bit [8] = logical port valid + * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default) + * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding. + * @page: QSFP page + * @set_page: set or ignore the page + * @data: pointer to data buffer to be read/written to the I2C device. + * @length: 1-16 for read, 1 for write. + * @write: 0 read, 1 for write. + * @cd: pointer to command details structure or NULL + * + * Read/Write SFF EEPROM (0x06EE) + */ +enum ice_status +ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, + u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, + bool write, struct ice_sq_cd *cd) +{ + struct ice_aqc_sff_eeprom *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + if (!data || (mem_addr & 0xff00)) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom); + cmd = &desc.params.read_write_sff_param; + desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF); + cmd->lport_num = (u8)(lport & 0xff); + cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); + cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) & + ICE_AQC_SFF_I2CBUS_7BIT_M) | + ((set_page << + ICE_AQC_SFF_SET_EEPROM_PAGE_S) & + ICE_AQC_SFF_SET_EEPROM_PAGE_M)); + cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff); + cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S); + if (write) + cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE); + + status = ice_aq_send_cmd(hw, &desc, data, length, cd); + return status; +} + +/** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure * @vsi_id: VSI FW index @@ -3148,7 +3224,7 @@ ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) * @tc: TC number * @q_handle: software queue handle */ -static struct ice_q_ctx * +struct ice_q_ctx * ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle) { struct ice_vsi_ctx *vsi; @@ -3245,9 +3321,12 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, node.node_teid = buf->txqs[0].q_teid; node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF; q_ctx->q_handle = q_handle; + q_ctx->q_teid = le32_to_cpu(node.node_teid); - /* add a leaf node into schduler tree queue layer */ + /* add a leaf node into scheduler tree queue layer */ status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node); + if (!status) + status = ice_sched_replay_q_bw(pi, q_ctx); ena_txq_exit: mutex_unlock(&pi->sched_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index c3df92f57777..b22aa561e253 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -6,16 +6,18 @@ #include "ice.h" #include "ice_type.h" +#include "ice_nvm.h" #include "ice_flex_pipe.h" #include "ice_switch.h" #include <linux/avf/virtchnl.h> enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw); -void -ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len); enum ice_status ice_init_hw(struct ice_hw *hw); void ice_deinit_hw(struct ice_hw *hw); +enum ice_status +ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, + u16 module_type); enum ice_status ice_check_reset(struct ice_hw *hw); enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req); enum ice_status ice_create_all_ctrlq(struct ice_hw *hw); @@ -117,6 +119,10 @@ ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd); enum ice_status ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, struct ice_sq_cd *cd); +enum ice_status +ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, + u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, + bool write, struct ice_sq_cd *cd); enum ice_status ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues, @@ -133,6 +139,8 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle); void ice_replay_post(struct ice_hw *hw); void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf); +struct ice_q_ctx * +ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle); void ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 2353166c654e..dd946866d7b8 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -810,6 +810,52 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) } /** + * ice_debug_cq + * @hw: pointer to the hardware structure + * @desc: pointer to control queue descriptor + * @buf: pointer to command buffer + * @buf_len: max length of buf + * + * Dumps debug log about control command with descriptor contents. + */ +static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len) +{ + struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; + u16 len; + + if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) && + !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask)) + return; + + if (!desc) + return; + + len = le16_to_cpu(cq_desc->datalen); + + ice_debug(hw, ICE_DBG_AQ_DESC, + "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", + le16_to_cpu(cq_desc->opcode), + le16_to_cpu(cq_desc->flags), + le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval)); + ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->cookie_high), + le32_to_cpu(cq_desc->cookie_low)); + ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.param0), + le32_to_cpu(cq_desc->params.generic.param1)); + ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.addr_high), + le32_to_cpu(cq_desc->params.generic.addr_low)); + if (buf && cq_desc->datalen != 0) { + ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n"); + if (buf_len < len) + len = buf_len; + + ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len); + } +} + +/** * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ) * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue @@ -934,10 +980,10 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, } /* Debug desc and buffer */ - ice_debug(hw, ICE_DBG_AQ_MSG, + ice_debug(hw, ICE_DBG_AQ_DESC, "ATQ: Control Send queue desc and buffer:\n"); - ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size); + ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size); (cq->sq.next_to_use)++; if (cq->sq.next_to_use == cq->sq.count) @@ -948,7 +994,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (ice_sq_done(hw, cq)) break; - mdelay(1); + udelay(ICE_CTL_Q_SQ_CMD_USEC); total_delay++; } while (total_delay < cq->sq_cmd_timeout); @@ -971,7 +1017,8 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, retval = le16_to_cpu(desc->retval); if (retval) { ice_debug(hw, ICE_DBG_AQ_MSG, - "Control Send Queue command completed with error 0x%x\n", + "Control Send Queue command 0x%04X completed with error 0x%X\n", + le16_to_cpu(desc->opcode), retval); /* strip off FW internal code */ @@ -986,7 +1033,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, ice_debug(hw, ICE_DBG_AQ_MSG, "ATQ: desc and buffer writeback:\n"); - ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size); + ice_debug_cq(hw, (void *)desc, buf, buf_size); /* save writeback AQ if requested */ if (details->wb_desc) @@ -1075,7 +1122,8 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; ice_debug(hw, ICE_DBG_AQ_MSG, - "Control Receive Queue Event received with error 0x%x\n", + "Control Receive Queue Event 0x%04X received with error 0x%X\n", + le16_to_cpu(desc->opcode), cq->rq_last_status); } memcpy(&e->desc, desc, sizeof(e->desc)); @@ -1084,10 +1132,9 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (e->msg_buf && e->msg_len) memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len); - ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n"); + ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n"); - ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf, - cq->rq_buf_size); + ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size); /* Restore the original datalen and buffer address in the desc, * FW updates datalen to indicate the event message size diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 44945c2165d8..bf0ebe6149e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -22,7 +22,7 @@ */ #define EXP_FW_API_VER_BRANCH 0x00 #define EXP_FW_API_VER_MAJOR 0x01 -#define EXP_FW_API_VER_MINOR 0x03 +#define EXP_FW_API_VER_MINOR 0x05 /* Different control queue types: These are mainly for SW consumption. */ enum ice_ctl_q { @@ -31,8 +31,9 @@ enum ice_ctl_q { ICE_CTL_Q_MAILBOX, }; -/* Control Queue default settings */ -#define ICE_CTL_Q_SQ_CMD_TIMEOUT 250 /* msecs */ +/* Control Queue timeout settings - max delay 250ms */ +#define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */ +#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ struct ice_ctl_q_ring { void *dma_head; /* Virtual address to DMA head */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index dd7efff121bd..713e8a892e14 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -965,9 +965,9 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change) pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) { /* Get current DCBX configuration */ ret = ice_get_dcb_cfg(pi); - pi->is_sw_lldp = (hw->adminq.sq_last_status == ICE_AQ_RC_EPERM); if (ret) return ret; + pi->is_sw_lldp = false; } else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) { return ICE_ERR_NOT_READY; } @@ -975,8 +975,8 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change) /* Configure the LLDP MIB change event */ if (enable_mib_change) { ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL); - if (!ret) - pi->is_sw_lldp = false; + if (ret) + pi->is_sw_lldp = true; } return ret; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index dd47869c4ad4..d3d3ec29def9 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include "ice_dcb_lib.h" +#include "ice_dcb_nl.h" /** * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration @@ -100,6 +101,16 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg) } /** + * ice_dcb_get_tc - Get the TC associated with the queue + * @vsi: ptr to the VSI + * @queue_index: queue number associated with VSI + */ +u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index) +{ + return vsi->tx_rings[queue_index]->dcb_tc; +} + +/** * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC * @vsi: VSI owner of rings being updated */ @@ -138,83 +149,62 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) } /** - * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs - * @pf: pointer to the PF struct - * - * Assumed caller has already disabled all VSIs before - * calling this function. Reconfiguring DCB based on - * local_dcbx_cfg. - */ -static void ice_pf_dcb_recfg(struct ice_pf *pf) -{ - struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; - u8 tc_map = 0; - int v, ret; - - /* Update each VSI */ - ice_for_each_vsi(pf, v) { - if (!pf->vsi[v]) - continue; - - if (pf->vsi[v]->type == ICE_VSI_PF) - tc_map = ice_dcb_get_ena_tc(dcbcfg); - else - tc_map = ICE_DFLT_TRAFFIC_CLASS; - - ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map); - if (ret) { - dev_err(&pf->pdev->dev, - "Failed to config TC for VSI index: %d\n", - pf->vsi[v]->idx); - continue; - } - - ice_vsi_map_rings_to_vectors(pf->vsi[v]); - } -} - -/** * ice_pf_dcb_cfg - Apply new DCB configuration * @pf: pointer to the PF struct * @new_cfg: DCBX config to apply * @locked: is the RTNL held */ -static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) { - struct ice_dcbx_cfg *old_cfg, *curr_cfg; struct ice_aqc_port_ets_elem buf = { 0 }; - int ret = 0; + struct ice_dcbx_cfg *old_cfg, *curr_cfg; + struct device *dev = ice_pf_to_dev(pf); + int ret = ICE_DCB_NO_HW_CHG; + struct ice_vsi *pf_vsi; curr_cfg = &pf->hw.port_info->local_dcbx_cfg; + /* FW does not care if change happened */ + if (!pf->hw.port_info->is_sw_lldp) + ret = ICE_DCB_HW_CHG_RST; + /* Enable DCB tagging only when more than one TC */ if (ice_dcb_get_num_tc(new_cfg) > 1) { - dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n"); + dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n"); set_bit(ICE_FLAG_DCB_ENA, pf->flags); } else { - dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n"); + dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n"); clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) { - dev_dbg(&pf->pdev->dev, "No change in DCB config required\n"); + dev_dbg(dev, "No change in DCB config required\n"); return ret; } /* Store old config in case FW config fails */ - old_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*old_cfg), GFP_KERNEL); - memcpy(old_cfg, curr_cfg, sizeof(*old_cfg)); + old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL); + if (!old_cfg) + return -ENOMEM; + + dev_info(dev, "Commit DCB Configuration to the hardware\n"); + pf_vsi = ice_get_main_vsi(pf); + if (!pf_vsi) { + dev_dbg(dev, "PF VSI doesn't exist\n"); + ret = -EINVAL; + goto free_cfg; + } /* avoid race conditions by holding the lock while disabling and * re-enabling the VSI */ if (!locked) rtnl_lock(); - ice_pf_dis_all_vsi(pf, true); + ice_dis_vsi(pf_vsi, true); memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg)); memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec)); + memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec)); /* Only send new config to HW if we are in SW LLDP mode. Otherwise, * the new config came from the HW in the first place. @@ -222,7 +212,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) if (pf->hw.port_info->is_sw_lldp) { ret = ice_set_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(&pf->pdev->dev, "Set DCB Config failed\n"); + dev_err(dev, "Set DCB Config failed\n"); /* Restore previous settings to local config */ memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg)); goto out; @@ -231,17 +221,18 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + dev_err(dev, "Query Port ETS failed\n"); goto out; } ice_pf_dcb_recfg(pf); out: - ice_pf_ena_all_vsi(pf, true); + ice_ena_vsi(pf_vsi, true); if (!locked) rtnl_unlock(); - devm_kfree(&pf->pdev->dev, old_cfg); +free_cfg: + kfree(old_cfg); return ret; } @@ -277,6 +268,7 @@ static bool ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, struct ice_dcbx_cfg *new_cfg) { + struct device *dev = ice_pf_to_dev(pf); bool need_reconfig = false; /* Check if ETS configuration has changed */ @@ -287,33 +279,33 @@ ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, &old_cfg->etscfg.prio_table, sizeof(new_cfg->etscfg.prio_table))) { need_reconfig = true; - dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n"); + dev_dbg(dev, "ETS UP2TC changed.\n"); } if (memcmp(&new_cfg->etscfg.tcbwtable, &old_cfg->etscfg.tcbwtable, sizeof(new_cfg->etscfg.tcbwtable))) - dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n"); + dev_dbg(dev, "ETS TC BW Table changed.\n"); if (memcmp(&new_cfg->etscfg.tsatable, &old_cfg->etscfg.tsatable, sizeof(new_cfg->etscfg.tsatable))) - dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n"); + dev_dbg(dev, "ETS TSA Table changed.\n"); } /* Check if PFC configuration has changed */ if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) { need_reconfig = true; - dev_dbg(&pf->pdev->dev, "PFC config change detected.\n"); + dev_dbg(dev, "PFC config change detected.\n"); } /* Check if APP Table has changed */ if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) { need_reconfig = true; - dev_dbg(&pf->pdev->dev, "APP Table change detected.\n"); + dev_dbg(dev, "APP Table change detected.\n"); } - dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig); + dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig); return need_reconfig; } @@ -325,11 +317,12 @@ void ice_dcb_rebuild(struct ice_pf *pf) { struct ice_dcbx_cfg *local_dcbx_cfg, *desired_dcbx_cfg, *prev_cfg; struct ice_aqc_port_ets_elem buf = { 0 }; + struct device *dev = ice_pf_to_dev(pf); enum ice_status ret; ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + dev_err(dev, "Query Port ETS failed\n"); goto dcb_error; } @@ -348,17 +341,14 @@ void ice_dcb_rebuild(struct ice_pf *pf) ice_cfg_etsrec_defaults(pf->hw.port_info); ret = ice_set_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(&pf->pdev->dev, "Failed to set DCB to unwilling\n"); + dev_err(dev, "Failed to set DCB to unwilling\n"); goto dcb_error; } /* Retrieve DCB config and ensure same as current in SW */ - prev_cfg = devm_kmemdup(&pf->pdev->dev, local_dcbx_cfg, - sizeof(*prev_cfg), GFP_KERNEL); - if (!prev_cfg) { - dev_err(&pf->pdev->dev, "Failed to alloc space for DCB cfg\n"); + prev_cfg = kmemdup(local_dcbx_cfg, sizeof(*prev_cfg), GFP_KERNEL); + if (!prev_cfg) goto dcb_error; - } ice_init_dcb(&pf->hw, true); if (pf->hw.port_info->dcbx_status == ICE_DCBX_STATUS_DIS) @@ -368,12 +358,13 @@ void ice_dcb_rebuild(struct ice_pf *pf) if (ice_dcb_need_recfg(pf, prev_cfg, local_dcbx_cfg)) { /* difference in cfg detected - disable DCB till next MIB */ - dev_err(&pf->pdev->dev, "Set local MIB not accurate\n"); + dev_err(dev, "Set local MIB not accurate\n"); + kfree(prev_cfg); goto dcb_error; } /* fetched config congruent to previous configuration */ - devm_kfree(&pf->pdev->dev, prev_cfg); + kfree(prev_cfg); /* Set the local desired config */ if (local_dcbx_cfg->dcbx_mode == ICE_DCBX_MODE_CEE) @@ -383,27 +374,30 @@ void ice_dcb_rebuild(struct ice_pf *pf) ice_cfg_etsrec_defaults(pf->hw.port_info); ret = ice_set_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(&pf->pdev->dev, "Failed to set desired config\n"); + dev_err(dev, "Failed to set desired config\n"); goto dcb_error; } - dev_info(&pf->pdev->dev, "DCB restored after reset\n"); + dev_info(dev, "DCB restored after reset\n"); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + dev_err(dev, "Query Port ETS failed\n"); goto dcb_error; } return; dcb_error: - dev_err(&pf->pdev->dev, "Disabling DCB until new settings occur\n"); - prev_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*prev_cfg), GFP_KERNEL); + dev_err(dev, "Disabling DCB until new settings occur\n"); + prev_cfg = kzalloc(sizeof(*prev_cfg), GFP_KERNEL); + if (!prev_cfg) + return; + prev_cfg->etscfg.willing = true; prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW; prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec)); ice_pf_dcb_cfg(pf, prev_cfg, false); - devm_kfree(&pf->pdev->dev, prev_cfg); + kfree(prev_cfg); } /** @@ -418,18 +412,17 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked) int ret = 0; pi = pf->hw.port_info; - newcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*newcfg), GFP_KERNEL); + newcfg = kmemdup(&pi->local_dcbx_cfg, sizeof(*newcfg), GFP_KERNEL); if (!newcfg) return -ENOMEM; - memcpy(newcfg, &pi->local_dcbx_cfg, sizeof(*newcfg)); memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg)); - dev_info(&pf->pdev->dev, "Configuring initial DCB values\n"); + dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n"); if (ice_pf_dcb_cfg(pf, newcfg, locked)) ret = -EINVAL; - devm_kfree(&pf->pdev->dev, newcfg); + kfree(newcfg); return ret; } @@ -437,9 +430,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked) /** * ice_dcb_sw_default_config - Apply a default DCB config * @pf: PF to apply config to + * @ets_willing: configure ets willing * @locked: was this function called with RTNL held */ -static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) +static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) { struct ice_aqc_port_ets_elem buf = { 0 }; struct ice_dcbx_cfg *dcbcfg; @@ -449,12 +443,13 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) hw = &pf->hw; pi = hw->port_info; - dcbcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*dcbcfg), GFP_KERNEL); + dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL); + if (!dcbcfg) + return -ENOMEM; - memset(dcbcfg, 0, sizeof(*dcbcfg)); memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg)); - dcbcfg->etscfg.willing = 1; + dcbcfg->etscfg.willing = ets_willing ? 1 : 0; dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc; dcbcfg->etscfg.tcbwtable[0] = 100; dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; @@ -472,7 +467,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE; ret = ice_pf_dcb_cfg(pf, dcbcfg, locked); - devm_kfree(&pf->pdev->dev, dcbcfg); + kfree(dcbcfg); if (ret) return ret; @@ -480,13 +475,112 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) } /** + * ice_dcb_tc_contig - Check that TCs are contiguous + * @prio_table: pointer to priority table + * + * Check if TCs begin with TC0 and are contiguous + */ +static bool ice_dcb_tc_contig(u8 *prio_table) +{ + u8 max_tc = 0; + int i; + + for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) { + u8 cur_tc = prio_table[i]; + + if (cur_tc > max_tc) + return false; + else if (cur_tc == max_tc) + max_tc++; + } + + return true; +} + +/** + * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs + * @pf: pointer to the PF struct + * + * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing + */ +static int ice_dcb_noncontig_cfg(struct ice_pf *pf) +{ + struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + struct device *dev = ice_pf_to_dev(pf); + int ret; + + /* Configure SW DCB default with ETS non-willing */ + ret = ice_dcb_sw_dflt_cfg(pf, false, true); + if (ret) { + dev_err(dev, "Failed to set local DCB config %d\n", ret); + return ret; + } + + /* Reconfigure with ETS willing so that FW will send LLDP MIB event */ + dcbcfg->etscfg.willing = 1; + ret = ice_set_dcb_cfg(pf->hw.port_info); + if (ret) + dev_err(dev, "Failed to set DCB to unwilling\n"); + + return ret; +} + +/** + * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs + * @pf: pointer to the PF struct + * + * Assumed caller has already disabled all VSIs before + * calling this function. Reconfiguring DCB based on + * local_dcbx_cfg. + */ +void ice_pf_dcb_recfg(struct ice_pf *pf) +{ + struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + u8 tc_map = 0; + int v, ret; + + /* Update each VSI */ + ice_for_each_vsi(pf, v) { + struct ice_vsi *vsi = pf->vsi[v]; + + if (!vsi) + continue; + + if (vsi->type == ICE_VSI_PF) { + tc_map = ice_dcb_get_ena_tc(dcbcfg); + + /* If DCBX request non-contiguous TC, then configure + * default TC + */ + if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) { + tc_map = ICE_DFLT_TRAFFIC_CLASS; + ice_dcb_noncontig_cfg(pf); + } + } else { + tc_map = ICE_DFLT_TRAFFIC_CLASS; + } + + ret = ice_vsi_cfg_tc(vsi, tc_map); + if (ret) { + dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n", + vsi->idx); + continue; + } + + ice_vsi_map_rings_to_vectors(vsi); + if (vsi->type == ICE_VSI_PF) + ice_dcbnl_set_all(vsi); + } +} + +/** * ice_init_pf_dcb - initialize DCB for a PF * @pf: PF to initialize DCB for * @locked: Was function called with RTNL held */ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) { - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); struct ice_port_info *port_info; struct ice_hw *hw = &pf->hw; int err; @@ -495,26 +589,39 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) err = ice_init_dcb(hw, false); if (err && !port_info->is_sw_lldp) { - dev_err(&pf->pdev->dev, "Error initializing DCB %d\n", err); + dev_err(dev, "Error initializing DCB %d\n", err); goto dcb_init_err; } - dev_info(&pf->pdev->dev, + dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n", pf->hw.func_caps.common_cap.maxtc); if (err) { + struct ice_vsi *pf_vsi; + /* FW LLDP is disabled, activate SW DCBX/LLDP mode */ - dev_info(&pf->pdev->dev, - "FW LLDP is disabled, DCBx/LLDP in SW mode.\n"); + dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n"); clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); - err = ice_dcb_sw_dflt_cfg(pf, locked); + err = ice_dcb_sw_dflt_cfg(pf, true, locked); if (err) { - dev_err(&pf->pdev->dev, + dev_err(dev, "Failed to set local DCB config %d\n", err); err = -EIO; goto dcb_init_err; } + /* If the FW DCBX engine is not running then Rx LLDP packets + * need to be redirected up the stack. + */ + pf_vsi = ice_get_main_vsi(pf); + if (!pf_vsi) { + dev_err(dev, "Failed to set local DCB config\n"); + err = -EIO; + goto dcb_init_err; + } + + ice_cfg_sw_lldp(pf_vsi, false, true); + pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; return 0; } @@ -623,10 +730,12 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { struct ice_aqc_port_ets_elem buf = { 0 }; + struct device *dev = ice_pf_to_dev(pf); struct ice_aqc_lldp_get_mib *mib; struct ice_dcbx_cfg tmp_dcbx_cfg; bool need_reconfig = false; struct ice_port_info *pi; + struct ice_vsi *pf_vsi; u8 type; int ret; @@ -635,8 +744,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, return; if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) { - dev_dbg(&pf->pdev->dev, - "MIB Change Event in HOST mode\n"); + dev_dbg(dev, "MIB Change Event in HOST mode\n"); return; } @@ -645,21 +753,20 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* Ignore if event is not for Nearest Bridge */ type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) & ICE_AQ_LLDP_BRID_TYPE_M); - dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type); + dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", type); if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID) return; /* Check MIB Type and return if event for Remote MIB update */ type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M; - dev_dbg(&pf->pdev->dev, - "LLDP event mib type %s\n", type ? "remote" : "local"); + dev_dbg(dev, "LLDP event mib type %s\n", type ? "remote" : "local"); if (type == ICE_AQ_LLDP_MIB_REMOTE) { /* Update the remote cached instance and return */ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, &pi->remote_dcbx_cfg); if (ret) { - dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n"); + dev_err(dev, "Failed to get remote DCB config\n"); return; } } @@ -673,37 +780,43 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* Get updated DCBX data from firmware */ ret = ice_get_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(&pf->pdev->dev, "Failed to get DCB config\n"); + dev_err(dev, "Failed to get DCB config\n"); return; } /* No change detected in DCBX configs */ if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) { - dev_dbg(&pf->pdev->dev, - "No change detected in DCBX configuration.\n"); + dev_dbg(dev, "No change detected in DCBX configuration.\n"); return; } need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg); + ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg); if (!need_reconfig) return; /* Enable DCB tagging only when more than one TC */ if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) { - dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n"); + dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n"); set_bit(ICE_FLAG_DCB_ENA, pf->flags); } else { - dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n"); + dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n"); clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } + pf_vsi = ice_get_main_vsi(pf); + if (!pf_vsi) { + dev_dbg(dev, "PF VSI doesn't exist\n"); + return; + } + rtnl_lock(); - ice_pf_dis_all_vsi(pf, true); + ice_dis_vsi(pf_vsi, true); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { - dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + dev_err(dev, "Query Port ETS failed\n"); rtnl_unlock(); return; } @@ -711,6 +824,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* changes in configuration update VSI */ ice_pf_dcb_recfg(pf); - ice_pf_ena_all_vsi(pf, true); + ice_ena_vsi(pf_vsi, true); rtnl_unlock(); } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 661a6f7bca64..f15e5776f287 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -5,14 +5,22 @@ #define _ICE_DCB_LIB_H_ #include "ice.h" +#include "ice_base.h" #include "ice_lib.h" #ifdef CONFIG_DCB -#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */ +#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */ +#define ICE_DCB_HW_CHG_RST 0 /* DCB configuration changed with reset */ +#define ICE_DCB_NO_HW_CHG 1 /* DCB configuration did not change */ +#define ICE_DCB_HW_CHG 2 /* DCB configuration changed, no reset */ void ice_dcb_rebuild(struct ice_pf *pf); u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg); u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); +u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index); +int +ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked); +void ice_pf_dcb_recfg(struct ice_pf *pf); void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi); int ice_init_pf_dcb(struct ice_pf *pf, bool locked); void ice_update_dcb_stats(struct ice_pf *pf); @@ -41,10 +49,25 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg) return 1; } +static inline u8 +ice_dcb_get_tc(struct ice_vsi __always_unused *vsi, + int __always_unused queue_index) +{ + return 0; +} + static inline int ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked) { - dev_dbg(&pf->pdev->dev, "DCB not supported\n"); + dev_dbg(ice_pf_to_dev(pf), "DCB not supported\n"); + return -EOPNOTSUPP; +} + +static inline int +ice_pf_dcb_cfg(struct ice_pf __always_unused *pf, + struct ice_dcbx_cfg __always_unused *new_cfg, + bool __always_unused locked) +{ return -EOPNOTSUPP; } @@ -56,6 +79,7 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring, } #define ice_update_dcb_stats(pf) do {} while (0) +#define ice_pf_dcb_recfg(pf) do {} while (0) #define ice_vsi_cfg_dcb_rings(vsi) do {} while (0) #define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0) #define ice_set_cgd_num(tlan_ctx, ring) do {} while (0) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c new file mode 100644 index 000000000000..d870c1aedc17 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -0,0 +1,933 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice.h" +#include "ice_dcb.h" +#include "ice_dcb_lib.h" +#include "ice_dcb_nl.h" +#include <net/dcbnl.h> + +#define ICE_APP_PROT_ID_ROCE 0x8915 + +/** + * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info + * @netdev: device associated with interface that needs reset + */ +static void ice_dcbnl_devreset(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + while (ice_is_reset_in_progress(pf->state)) + usleep_range(1000, 2000); + + set_bit(__ICE_DCBNL_DEVRESET, pf->state); + dev_close(netdev); + netdev_state_change(netdev); + dev_open(netdev, NULL); + netdev_state_change(netdev); + clear_bit(__ICE_DCBNL_DEVRESET, pf->state); +} + +/** + * ice_dcbnl_getets - retrieve local ETS configuration + * @netdev: the relevant netdev + * @ets: struct to hold ETS configuration + */ +static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets) +{ + struct ice_dcbx_cfg *dcbxcfg; + struct ice_port_info *pi; + struct ice_pf *pf; + + pf = ice_netdev_to_pf(netdev); + pi = pf->hw.port_info; + dcbxcfg = &pi->local_dcbx_cfg; + + ets->willing = dcbxcfg->etscfg.willing; + ets->ets_cap = dcbxcfg->etscfg.maxtcs; + ets->cbs = dcbxcfg->etscfg.cbs; + memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw)); + memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw)); + memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa)); + memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc)); + memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable, + sizeof(ets->tc_reco_bw)); + memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable, + sizeof(ets->tc_reco_tsa)); + memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table, + sizeof(ets->reco_prio_tc)); + + return 0; +} + +/** + * ice_dcbnl_setets - set IEEE ETS configuration + * @netdev: pointer to relevant netdev + * @ets: struct to hold ETS configuration + */ +static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int bwcfg = 0, bwrec = 0; + int err, i, max_tc = 0; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + mutex_lock(&pf->tc_mutex); + + new_cfg->etscfg.willing = ets->willing; + new_cfg->etscfg.cbs = ets->cbs; + ice_for_each_traffic_class(i) { + new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i]; + bwcfg += ets->tc_tx_bw[i]; + new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i]; + new_cfg->etscfg.prio_table[i] = ets->prio_tc[i]; + if (ets->prio_tc[i] > max_tc) + max_tc = ets->prio_tc[i]; + new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i]; + bwrec += ets->tc_reco_bw[i]; + new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i]; + new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i]; + } + + /* max_tc is a 1-8 value count of number of TC's, not a 0-7 value + * for the TC's index number. Add one to value if not zero, and + * for zero set it to the FW's default value + */ + if (max_tc) + max_tc++; + else + max_tc = IEEE_8021QAZ_MAX_TCS; + + new_cfg->etscfg.maxtcs = max_tc; + + if (!bwcfg) + new_cfg->etscfg.tcbwtable[0] = 100; + + if (!bwrec) + new_cfg->etsrec.tcbwtable[0] = 100; + + err = ice_pf_dcb_cfg(pf, new_cfg, true); + /* return of zero indicates new cfg applied */ + if (err == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (err == ICE_DCB_NO_HW_CHG) + err = ICE_DCB_HW_CHG_RST; + + mutex_unlock(&pf->tc_mutex); + return err; +} + +/** + * ice_dcbnl_getnumtcs - Get max number of traffic classes supported + * @dev: pointer to netdev struct + * @tcid: TC ID + * @num: total number of TCs supported by the adapter + * + * Return the total number of TCs supported + */ +static int +ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num) +{ + struct ice_pf *pf = ice_netdev_to_pf(dev); + + if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) + return -EINVAL; + + *num = IEEE_8021QAZ_MAX_TCS; + return 0; +} + +/** + * ice_dcbnl_getdcbx - retrieve current DCBX capability + * @netdev: pointer to the netdev struct + */ +static u8 ice_dcbnl_getdcbx(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + return pf->dcbx_cap; +} + +/** + * ice_dcbnl_setdcbx - set required DCBX capability + * @netdev: the corresponding netdev + * @mode: required mode + */ +static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + /* No support for LLD_MANAGED modes or CEE+IEEE */ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) || + !(mode & DCB_CAP_DCBX_HOST)) + return ICE_DCB_NO_HW_CHG; + + /* Already set to the given mode no change */ + if (mode == pf->dcbx_cap) + return ICE_DCB_NO_HW_CHG; + + pf->dcbx_cap = mode; + if (mode & DCB_CAP_DCBX_VER_CEE) + pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE; + else + pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE; + + dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode); + return ICE_DCB_HW_CHG_RST; +} + +/** + * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX + * @netdev: pointer to netdev struct + * @perm_addr: buffer to return permanent MAC address + */ +static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + int i, j; + + memset(perm_addr, 0xff, MAX_ADDR_LEN); + + for (i = 0; i < netdev->addr_len; i++) + perm_addr[i] = pi->mac.perm_addr[i]; + + for (j = 0; j < netdev->addr_len; j++, i++) + perm_addr[i] = pi->mac.perm_addr[j]; +} + +/** + * ice_get_pfc_delay - Retrieve PFC Link Delay + * @hw: pointer to HW struct + * @delay: holds the PFC Link Delay value + */ +static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay) +{ + u32 val; + + val = rd32(hw, PRTDCB_GENC); + *delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S); +} + +/** + * ice_dcbnl_getpfc - retrieve local IEEE PFC config + * @netdev: pointer to netdev struct + * @pfc: struct to hold PFC info + */ +static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + struct ice_dcbx_cfg *dcbxcfg; + int i; + + dcbxcfg = &pi->local_dcbx_cfg; + pfc->pfc_cap = dcbxcfg->pfc.pfccap; + pfc->pfc_en = dcbxcfg->pfc.pfcena; + pfc->mbc = dcbxcfg->pfc.mbc; + ice_get_pfc_delay(&pf->hw, &pfc->delay); + + ice_for_each_traffic_class(i) { + pfc->requests[i] = pf->stats.priority_xoff_tx[i]; + pfc->indications[i] = pf->stats.priority_xoff_rx[i]; + } + + return 0; +} + +/** + * ice_dcbnl_setpfc - set local IEEE PFC config + * @netdev: pointer to relevant netdev + * @pfc: pointer to struct holding PFC config + */ +static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int err; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + mutex_lock(&pf->tc_mutex); + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + if (pfc->pfc_cap) + new_cfg->pfc.pfccap = pfc->pfc_cap; + else + new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc; + + new_cfg->pfc.pfcena = pfc->pfc_en; + + err = ice_pf_dcb_cfg(pf, new_cfg, true); + if (err == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (err == ICE_DCB_NO_HW_CHG) + err = ICE_DCB_HW_CHG_RST; + mutex_unlock(&pf->tc_mutex); + return err; +} + +/** + * ice_dcbnl_get_pfc_cfg - Get CEE PFC config + * @netdev: pointer to netdev struct + * @prio: corresponding user priority + * @setting: the PFC setting for given priority + */ +static void +ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + *setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1; + dev_dbg(ice_pf_to_dev(pf), + "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n", + prio, *setting, pi->local_dcbx_cfg.pfc.pfcena); +} + +/** + * ice_dcbnl_set_pfc_cfg - Set CEE PFC config + * @netdev: the corresponding netdev + * @prio: User Priority + * @set: PFC setting to apply + */ +static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc; + if (set) + new_cfg->pfc.pfcena |= BIT(prio); + else + new_cfg->pfc.pfcena &= ~BIT(prio); + + dev_dbg(ice_pf_to_dev(pf), "Set PFC config UP:%d set:%d pfcena:0x%x\n", + prio, set, new_cfg->pfc.pfcena); +} + +/** + * ice_dcbnl_getpfcstate - get CEE PFC mode + * @netdev: pointer to netdev struct + */ +static u8 ice_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + /* Return enabled if any UP enabled for PFC */ + if (pi->local_dcbx_cfg.pfc.pfcena) + return 1; + + return 0; +} + +/** + * ice_dcbnl_getstate - get DCB enabled state + * @netdev: pointer to netdev struct + */ +static u8 ice_dcbnl_getstate(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + u8 state = 0; + + state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); + + dev_dbg(ice_pf_to_dev(pf), "DCB enabled state = %d\n", state); + return state; +} + +/** + * ice_dcbnl_setstate - Set CEE DCB state + * @netdev: pointer to relevant netdev + * @state: state value to set + */ +static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return ICE_DCB_NO_HW_CHG; + + /* Nothing to do */ + if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags)) + return ICE_DCB_NO_HW_CHG; + + if (state) { + set_bit(ICE_FLAG_DCB_ENA, pf->flags); + memcpy(&pf->hw.port_info->desired_dcbx_cfg, + &pf->hw.port_info->local_dcbx_cfg, + sizeof(struct ice_dcbx_cfg)); + } else { + clear_bit(ICE_FLAG_DCB_ENA, pf->flags); + } + + return ICE_DCB_HW_CHG; +} + +/** + * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config + * @netdev: pointer to netdev struct + * @prio: the corresponding user priority + * @prio_type: traffic priority type + * @pgid: the BW group ID the traffic class belongs to + * @bw_pct: BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding TC + */ +static void +ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio, + u8 __always_unused *prio_type, u8 *pgid, + u8 __always_unused *bw_pct, + u8 __always_unused *up_map) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio]; + dev_dbg(ice_pf_to_dev(pf), + "Get PG config prio=%d tc=%d\n", prio, *pgid); +} + +/** + * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config + * @netdev: pointer to relevant netdev + * @tc: the corresponding traffic class + * @prio_type: the traffic priority type + * @bwg_id: the BW group ID the TC belongs to + * @bw_pct: the BW perventage for the BWG + * @up_map: prio mapped to corresponding TC + */ +static void +ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, + u8 __always_unused prio_type, + u8 __always_unused bwg_id, + u8 __always_unused bw_pct, u8 up_map) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int i; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (tc >= ICE_MAX_TRAFFIC_CLASS) + return; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + /* prio_type, bwg_id and bw_pct per UP are not supported */ + + ice_for_each_traffic_class(i) { + if (up_map & BIT(i)) + new_cfg->etscfg.prio_table[i] = tc; + } + new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS; +} + +/** + * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config + * @netdev: pointer to the netdev struct + * @pgid: corresponding traffic class + * @bw_pct: the BW percentage for the corresponding TC + */ +static void +ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (pgid >= ICE_MAX_TRAFFIC_CLASS) + return; + + *bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid]; + dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n", + pgid, *bw_pct); +} + +/** + * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config + * @netdev: the corresponding netdev + * @pgid: Correspongind traffic class + * @bw_pct: the BW percentage for the specified TC + */ +static void +ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (pgid >= ICE_MAX_TRAFFIC_CLASS) + return; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + new_cfg->etscfg.tcbwtable[pgid] = bw_pct; +} + +/** + * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config + * @netdev: pointer to netdev struct + * @prio: the corresponding user priority + * @prio_type: the traffic priority type + * @pgid: the PG ID + * @bw_pct: the BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding TC + */ +static void +ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio, + u8 __always_unused *prio_type, u8 *pgid, + u8 __always_unused *bw_pct, + u8 __always_unused *up_map) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_port_info *pi = pf->hw.port_info; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + if (prio >= ICE_MAX_USER_PRIORITY) + return; + + *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio]; +} + +/** + * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config + * @netdev: pointer to netdev struct + * @pgid: the corresponding traffic class + * @bw_pct: the BW percentage for the corresponding TC + */ +static void +ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid, + u8 *bw_pct) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return; + + *bw_pct = 0; +} + +/** + * ice_dcbnl_get_cap - Get DCBX capabilities of adapter + * @netdev: pointer to netdev struct + * @capid: the capability type + * @cap: the capability value + */ +static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + + if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))) + return ICE_DCB_NO_HW_CHG; + + switch (capid) { + case DCB_CAP_ATTR_PG: + *cap = true; + break; + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_UP2TC: + *cap = false; + break; + case DCB_CAP_ATTR_PG_TCS: + *cap = 0x80; + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 0x80; + break; + case DCB_CAP_ATTR_GSP: + *cap = false; + break; + case DCB_CAP_ATTR_BCN: + *cap = false; + break; + case DCB_CAP_ATTR_DCBX: + *cap = pf->dcbx_cap; + break; + default: + *cap = false; + break; + } + + dev_dbg(ice_pf_to_dev(pf), "DCBX Get Capability cap=%d capval=0x%x\n", + capid, *cap); + return 0; +} + +/** + * ice_dcbnl_getapp - get CEE APP + * @netdev: pointer to netdev struct + * @idtype: the App selector + * @id: the App ethtype or port number + */ +static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + return dcb_getapp(netdev, &app); +} + +/** + * ice_dcbnl_find_app - Search for APP in given DCB config + * @cfg: struct to hold DCBX config + * @app: struct to hold app data to look for + */ +static bool +ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg, + struct ice_dcb_app_priority_table *app) +{ + int i; + + for (i = 0; i < cfg->numapps; i++) { + if (app->selector == cfg->app[i].selector && + app->prot_id == cfg->app[i].prot_id && + app->priority == cfg->app[i].priority) + return true; + } + + return false; +} + +/** + * ice_dcbnl_setapp - set local IEEE App config + * @netdev: relevant netdev struct + * @app: struct to hold app config info + */ +static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcb_app_priority_table new_app; + struct ice_dcbx_cfg *old_cfg, *new_cfg; + int ret; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + mutex_lock(&pf->tc_mutex); + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + old_cfg = &pf->hw.port_info->local_dcbx_cfg; + + if (old_cfg->numapps == ICE_DCBX_MAX_APPS) { + ret = -EINVAL; + goto setapp_out; + } + + ret = dcb_ieee_setapp(netdev, app); + if (ret) + goto setapp_out; + + new_app.selector = app->selector; + new_app.prot_id = app->protocol; + new_app.priority = app->priority; + if (ice_dcbnl_find_app(old_cfg, &new_app)) { + ret = 0; + goto setapp_out; + } + + new_cfg->app[new_cfg->numapps++] = new_app; + ret = ice_pf_dcb_cfg(pf, new_cfg, true); + /* return of zero indicates new cfg applied */ + if (ret == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (ret == ICE_DCB_NO_HW_CHG) + ret = ICE_DCB_HW_CHG_RST; + +setapp_out: + mutex_unlock(&pf->tc_mutex); + return ret; +} + +/** + * ice_dcbnl_delapp - Delete local IEEE App config + * @netdev: relevant netdev + * @app: struct to hold app too delete + * + * Will not delete first application required by the FW + */ +static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *old_cfg, *new_cfg; + int i, j, ret = 0; + + if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) + return -EINVAL; + + mutex_lock(&pf->tc_mutex); + ret = dcb_ieee_delapp(netdev, app); + if (ret) + goto delapp_out; + + old_cfg = &pf->hw.port_info->local_dcbx_cfg; + + if (old_cfg->numapps == 1) + goto delapp_out; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + for (i = 1; i < new_cfg->numapps; i++) { + if (app->selector == new_cfg->app[i].selector && + app->protocol == new_cfg->app[i].prot_id && + app->priority == new_cfg->app[i].priority) { + new_cfg->app[i].selector = 0; + new_cfg->app[i].prot_id = 0; + new_cfg->app[i].priority = 0; + break; + } + } + + /* Did not find DCB App */ + if (i == new_cfg->numapps) { + ret = -EINVAL; + goto delapp_out; + } + + new_cfg->numapps--; + + for (j = i; j < new_cfg->numapps; j++) { + new_cfg->app[i].selector = old_cfg->app[j + 1].selector; + new_cfg->app[i].prot_id = old_cfg->app[j + 1].prot_id; + new_cfg->app[i].priority = old_cfg->app[j + 1].priority; + } + + ret = ice_pf_dcb_cfg(pf, new_cfg, true); + /* return of zero indicates new cfg applied */ + if (ret == ICE_DCB_HW_CHG_RST) + ice_dcbnl_devreset(netdev); + if (ret == ICE_DCB_NO_HW_CHG) + ret = ICE_DCB_HW_CHG_RST; + +delapp_out: + mutex_unlock(&pf->tc_mutex); + return ret; +} + +/** + * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW + * @netdev: the corresponding netdev + */ +static u8 ice_dcbnl_cee_set_all(struct net_device *netdev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *new_cfg; + int err; + + if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || + !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return ICE_DCB_NO_HW_CHG; + + new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + + mutex_lock(&pf->tc_mutex); + + err = ice_pf_dcb_cfg(pf, new_cfg, true); + + mutex_unlock(&pf->tc_mutex); + return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err; +} + +static const struct dcbnl_rtnl_ops dcbnl_ops = { + /* IEEE 802.1Qaz std */ + .ieee_getets = ice_dcbnl_getets, + .ieee_setets = ice_dcbnl_setets, + .ieee_getpfc = ice_dcbnl_getpfc, + .ieee_setpfc = ice_dcbnl_setpfc, + .ieee_setapp = ice_dcbnl_setapp, + .ieee_delapp = ice_dcbnl_delapp, + + /* CEE std */ + .getstate = ice_dcbnl_getstate, + .setstate = ice_dcbnl_setstate, + .getpermhwaddr = ice_dcbnl_get_perm_hw_addr, + .setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx, + .setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx, + .getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx, + .getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx, + .getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx, + .getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx, + .setpfccfg = ice_dcbnl_set_pfc_cfg, + .getpfccfg = ice_dcbnl_get_pfc_cfg, + .setall = ice_dcbnl_cee_set_all, + .getcap = ice_dcbnl_get_cap, + .getnumtcs = ice_dcbnl_getnumtcs, + .getpfcstate = ice_dcbnl_getpfcstate, + .getapp = ice_dcbnl_getapp, + + /* DCBX configuration */ + .getdcbx = ice_dcbnl_getdcbx, + .setdcbx = ice_dcbnl_setdcbx, +}; + +/** + * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config + * @vsi: pointer to VSI struct + */ +void ice_dcbnl_set_all(struct ice_vsi *vsi) +{ + struct net_device *netdev = vsi->netdev; + struct ice_dcbx_cfg *dcbxcfg; + struct ice_port_info *pi; + struct dcb_app sapp; + struct ice_pf *pf; + int i; + + if (!netdev) + return; + + pf = ice_netdev_to_pf(netdev); + pi = pf->hw.port_info; + + /* SW DCB taken care of by SW Default Config */ + if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) + return; + + /* DCB not enabled */ + if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags)) + return; + + dcbxcfg = &pi->local_dcbx_cfg; + + for (i = 0; i < dcbxcfg->numapps; i++) { + u8 prio, tc_map; + + prio = dcbxcfg->app[i].priority; + tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]); + + /* Add APP only if the TC is enabled for this VSI */ + if (tc_map & vsi->tc_cfg.ena_tc) { + sapp.selector = dcbxcfg->app[i].selector; + sapp.protocol = dcbxcfg->app[i].prot_id; + sapp.priority = prio; + dcb_ieee_setapp(netdev, &sapp); + } + } + /* Notify user-space of the changes */ + dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0); +} + +/** + * ice_dcbnl_vsi_del_app - Delete APP on all VSIs + * @vsi: pointer to the main VSI + * @app: APP to delete + * + * Delete given APP from all the VSIs for given PF + */ +static void +ice_dcbnl_vsi_del_app(struct ice_vsi *vsi, + struct ice_dcb_app_priority_table *app) +{ + struct dcb_app sapp; + int err; + + sapp.selector = app->selector; + sapp.protocol = app->prot_id; + sapp.priority = app->priority; + err = ice_dcbnl_delapp(vsi->netdev, &sapp); + dev_dbg(&vsi->back->pdev->dev, + "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n", + vsi->idx, err, app->selector, app->prot_id, app->priority); +} + +/** + * ice_dcbnl_flush_apps - Delete all removed APPs + * @pf: the corresponding PF + * @old_cfg: old DCBX configuration data + * @new_cfg: new DCBX configuration data + * + * Find and delete all APPS that are not present in the passed + * DCB configuration + */ +void +ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, + struct ice_dcbx_cfg *new_cfg) +{ + struct ice_vsi *main_vsi = ice_get_main_vsi(pf); + int i; + + if (!main_vsi) + return; + + for (i = 0; i < old_cfg->numapps; i++) { + struct ice_dcb_app_priority_table app = old_cfg->app[i]; + + /* The APP is not available anymore delete it */ + if (!ice_dcbnl_find_app(new_cfg, &app)) + ice_dcbnl_vsi_del_app(main_vsi, &app); + } +} + +/** + * ice_dcbnl_setup - setup DCBNL + * @vsi: VSI to get associated netdev from + */ +void ice_dcbnl_setup(struct ice_vsi *vsi) +{ + struct net_device *netdev = vsi->netdev; + struct ice_pf *pf; + + pf = ice_netdev_to_pf(netdev); + if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) + return; + + netdev->dcbnl_ops = &dcbnl_ops; + ice_dcbnl_set_all(vsi); +} diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h new file mode 100644 index 000000000000..6c630a362293 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_DCB_NL_H_ +#define _ICE_DCB_NL_H_ + +#ifdef CONFIG_DCB +void ice_dcbnl_setup(struct ice_vsi *vsi); +void ice_dcbnl_set_all(struct ice_vsi *vsi); +void +ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, + struct ice_dcbx_cfg *new_cfg); +#else +#define ice_dcbnl_setup(vsi) do {} while (0) +#define ice_dcbnl_set_all(vsi) do {} while (0) +#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0) +#endif /* CONFIG_DCB */ + +#endif /* _ICE_DCB_NL_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 7e23034df955..aec3c6c379df 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -156,6 +156,7 @@ struct ice_priv_flag { static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA), ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT), + ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), }; #define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) @@ -247,7 +248,7 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, int ret = 0; u16 *buf; - dev = &pf->pdev->dev; + dev = ice_pf_to_dev(pf); eeprom->magic = hw->vendor_id | (hw->device_id << 16); @@ -342,6 +343,7 @@ static u64 ice_eeprom_test(struct net_device *netdev) static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) { struct ice_pf *pf = (struct ice_pf *)hw->back; + struct device *dev = ice_pf_to_dev(pf); static const u32 patterns[] = { 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF @@ -357,7 +359,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) val = rd32(hw, reg); if (val == pattern) continue; - dev_err(&pf->pdev->dev, + dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n" , __func__, reg, pattern, val); return 1; @@ -366,7 +368,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) wr32(hw, reg, orig_val); val = rd32(hw, reg); if (val != orig_val) { - dev_err(&pf->pdev->dev, + dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n" , __func__, reg, orig_val, val); return 1; @@ -506,7 +508,7 @@ static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size) if (!pf) return -EINVAL; - data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL); if (!data) return -ENOMEM; @@ -623,7 +625,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) continue; rx_buf = &rx_ring->rx_buf[i]; - received_buf = page_address(rx_buf->page); + received_buf = page_address(rx_buf->page) + rx_buf->page_offset; if (ice_lbtest_check_frame(received_buf)) valid_frames++; @@ -648,9 +650,11 @@ static u64 ice_loopback_test(struct net_device *netdev) u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; LIST_HEAD(tmp_list); + struct device *dev; u8 *tx_frame; int i; + dev = ice_pf_to_dev(pf); netdev_info(netdev, "loopback test\n"); test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info); @@ -711,12 +715,12 @@ static u64 ice_loopback_test(struct net_device *netdev) ret = 10; lbtest_free_frame: - devm_kfree(&pf->pdev->dev, tx_frame); + devm_kfree(dev, tx_frame); remove_mac_filters: if (ice_remove_mac(&pf->hw, &tmp_list)) netdev_err(netdev, "Could not remove MAC filter for the test VSI"); free_mac_list: - ice_free_fltr_list(&pf->pdev->dev, &tmp_list); + ice_free_fltr_list(dev, &tmp_list); lbtest_mac_dis: /* Disable MAC loopback after the test is completed. */ if (ice_aq_set_mac_loopback(&pf->hw, false, NULL)) @@ -773,6 +777,9 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, struct ice_netdev_priv *np = netdev_priv(netdev); bool if_running = netif_running(netdev); struct ice_pf *pf = np->vsi->back; + struct device *dev; + + dev = ice_pf_to_dev(pf); if (eth_test->flags == ETH_TEST_FL_OFFLINE) { netdev_info(netdev, "offline testing starting\n"); @@ -780,7 +787,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, set_bit(__ICE_TESTING, pf->state); if (ice_active_vfs(pf)) { - dev_warn(&pf->pdev->dev, + dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); data[ICE_ETH_TEST_REG] = 1; data[ICE_ETH_TEST_EEPROM] = 1; @@ -815,8 +822,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, int status = ice_open(netdev); if (status) { - dev_err(&pf->pdev->dev, - "Could not open device %s, err %d", + dev_err(dev, "Could not open device %s, err %d", pf->int_name, status); } } @@ -961,7 +967,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec) } /* Get last SW configuration */ - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM; @@ -1006,7 +1012,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec) } done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; } @@ -1082,7 +1088,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) break; } - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM; @@ -1109,7 +1115,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) fecparam->fec |= ETHTOOL_FEC_OFF; done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; } @@ -1154,12 +1160,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct device *dev; int ret = 0; u32 i; if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE)) return -EINVAL; + dev = ice_pf_to_dev(pf); set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS); @@ -1188,7 +1196,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) * events to respond to. */ if (status) - dev_info(&pf->pdev->dev, + dev_info(dev, "Failed to unreg for LLDP events\n"); /* The AQ call to stop the FW LLDP agent will generate @@ -1196,20 +1204,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) */ status = ice_aq_stop_lldp(&pf->hw, true, true, NULL); if (status) - dev_warn(&pf->pdev->dev, - "Fail to stop LLDP agent\n"); + dev_warn(dev, "Fail to stop LLDP agent\n"); /* Use case for having the FW LLDP agent stopped * will likely not need DCB, so failure to init is * not a concern of ethtool */ status = ice_init_pf_dcb(pf, true); if (status) - dev_warn(&pf->pdev->dev, "Fail to init DCB\n"); - - /* Forward LLDP packets to default VSI so that they - * are passed up the stack - */ - ice_cfg_sw_lldp(vsi, false, true); + dev_warn(dev, "Fail to init DCB\n"); } else { enum ice_status status; bool dcbx_agent_status; @@ -1219,8 +1221,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) */ status = ice_aq_start_lldp(&pf->hw, true, NULL); if (status) - dev_warn(&pf->pdev->dev, - "Fail to start LLDP Agent\n"); + dev_warn(dev, "Fail to start LLDP Agent\n"); /* AQ command to start FW DCBX agent will fail if * the agent is already started @@ -1229,10 +1230,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) &dcbx_agent_status, NULL); if (status) - dev_dbg(&pf->pdev->dev, - "Failed to start FW DCBX\n"); + dev_dbg(dev, "Failed to start FW DCBX\n"); - dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n", + dev_info(dev, "FW DCBX agent is %s\n", dcbx_agent_status ? "ACTIVE" : "DISABLED"); /* Failure to configure MIB change or init DCB is not @@ -1242,7 +1242,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) */ status = ice_init_pf_dcb(pf, true); if (status) - dev_dbg(&pf->pdev->dev, "Fail to init DCB\n"); + dev_dbg(dev, "Fail to init DCB\n"); /* Remove rule to direct LLDP packets to default VSI. * The FW LLDP engine will now be consuming them. @@ -1252,10 +1252,15 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) /* Register for MIB change events */ status = ice_cfg_lldp_mib_change(&pf->hw, true); if (status) - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "Fail to enable MIB change events\n"); } } + if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { + /* down and up VSI so that changes of Rx cfg are reflected. */ + ice_down(vsi); + ice_up(vsi); + } clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); return ret; } @@ -2140,7 +2145,7 @@ ice_get_link_ksettings(struct net_device *netdev, /* flow control is symmetric and always supported */ ethtool_link_ksettings_add_link_mode(ks, supported, Pause); - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) return -ENOMEM; @@ -2198,7 +2203,7 @@ ice_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); done: - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); return err; } @@ -2427,8 +2432,7 @@ ice_set_link_ksettings(struct net_device *netdev, usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX); } - abilities = devm_kzalloc(&pf->pdev->dev, sizeof(*abilities), - GFP_KERNEL); + abilities = kzalloc(sizeof(*abilities), GFP_KERNEL); if (!abilities) return -ENOMEM; @@ -2520,7 +2524,7 @@ ice_set_link_ksettings(struct net_device *netdev, } done: - devm_kfree(&pf->pdev->dev, abilities); + kfree(abilities); clear_bit(__ICE_CFG_BUSY, pf->state); return err; @@ -2577,6 +2581,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { struct ice_ring *tx_rings = NULL, *rx_rings = NULL; struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_ring *xdp_rings = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; int i, timeout = 50, err = 0; @@ -2611,6 +2616,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) return 0; } + /* If there is a AF_XDP UMEM attached to any of Rx rings, + * disallow changing the number of descriptors -- regardless + * if the netdev is running or not. + */ + if (ice_xsk_any_rx_ring_ena(vsi)) + return -EBUSY; + while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) @@ -2624,6 +2636,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) vsi->tx_rings[i]->count = new_tx_cnt; for (i = 0; i < vsi->alloc_rxq; i++) vsi->rx_rings[i]->count = new_rx_cnt; + if (ice_is_xdp_ena_vsi(vsi)) + for (i = 0; i < vsi->num_xdp_txq; i++) + vsi->xdp_rings[i]->count = new_tx_cnt; + vsi->num_tx_desc = new_tx_cnt; + vsi->num_rx_desc = new_rx_cnt; netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); goto done; } @@ -2635,14 +2652,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n", vsi->tx_rings[0]->count, new_tx_cnt); - tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, - sizeof(*tx_rings), GFP_KERNEL); + tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL); if (!tx_rings) { err = -ENOMEM; goto done; } - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_txq(vsi, i) { /* clone ring and setup updated count */ tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_cnt; @@ -2650,15 +2666,42 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) tx_rings[i].tx_buf = NULL; err = ice_setup_tx_ring(&tx_rings[i]); if (err) { - while (i) { - i--; + while (i--) ice_clean_tx_ring(&tx_rings[i]); - } - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); goto done; } } + if (!ice_is_xdp_ena_vsi(vsi)) + goto process_rx; + + /* alloc updated XDP resources */ + netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n", + vsi->xdp_rings[0]->count, new_tx_cnt); + + xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL); + if (!xdp_rings) { + err = -ENOMEM; + goto free_tx; + } + + for (i = 0; i < vsi->num_xdp_txq; i++) { + /* clone ring and setup updated count */ + xdp_rings[i] = *vsi->xdp_rings[i]; + xdp_rings[i].count = new_tx_cnt; + xdp_rings[i].desc = NULL; + xdp_rings[i].tx_buf = NULL; + err = ice_setup_tx_ring(&xdp_rings[i]); + if (err) { + while (i--) + ice_clean_tx_ring(&xdp_rings[i]); + kfree(xdp_rings); + goto free_tx; + } + ice_set_ring_xdp(&xdp_rings[i]); + } + process_rx: if (new_rx_cnt == vsi->rx_rings[0]->count) goto process_link; @@ -2667,14 +2710,13 @@ process_rx: netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n", vsi->rx_rings[0]->count, new_rx_cnt); - rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, - sizeof(*rx_rings), GFP_KERNEL); + rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL); if (!rx_rings) { err = -ENOMEM; goto done; } - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_rxq(vsi, i) { /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; @@ -2698,7 +2740,7 @@ rx_unwind: i--; ice_free_rx_ring(&rx_rings[i]); } - devm_kfree(&pf->pdev->dev, rx_rings); + kfree(rx_rings); err = -ENOMEM; goto free_tx; } @@ -2712,15 +2754,15 @@ process_link: ice_down(vsi); if (tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_txq(vsi, i) { ice_free_tx_ring(vsi->tx_rings[i]); *vsi->tx_rings[i] = tx_rings[i]; } - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); } if (rx_rings) { - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_rxq(vsi, i) { ice_free_rx_ring(vsi->rx_rings[i]); /* copy the real tail offset */ rx_rings[i].tail = vsi->rx_rings[i]->tail; @@ -2734,9 +2776,19 @@ process_link: rx_rings[i].next_to_alloc = 0; *vsi->rx_rings[i] = rx_rings[i]; } - devm_kfree(&pf->pdev->dev, rx_rings); + kfree(rx_rings); + } + + if (xdp_rings) { + for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_free_tx_ring(vsi->xdp_rings[i]); + *vsi->xdp_rings[i] = xdp_rings[i]; + } + kfree(xdp_rings); } + vsi->num_tx_desc = new_tx_cnt; + vsi->num_rx_desc = new_rx_cnt; ice_up(vsi); } goto done; @@ -2744,9 +2796,9 @@ process_link: free_tx: /* error cleanup if the Rx allocations failed after getting Tx */ if (tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) + ice_for_each_txq(vsi, i) ice_free_tx_ring(&tx_rings[i]); - devm_kfree(&pf->pdev->dev, tx_rings); + kfree(tx_rings); } done: @@ -2794,7 +2846,6 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_port_info *pi = np->vsi->port_info; struct ice_aqc_get_phy_caps_data *pcaps; - struct ice_vsi *vsi = np->vsi; struct ice_dcbx_cfg *dcbx_cfg; enum ice_status status; @@ -2804,8 +2855,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) dcbx_cfg = &pi->local_dcbx_cfg; - pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps), - GFP_KERNEL); + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return; @@ -2828,7 +2878,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) pause->rx_pause = 1; out: - devm_kfree(&vsi->back->pdev->dev, pcaps); + kfree(pcaps); } /** @@ -3009,7 +3059,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) return -EIO; } - lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; @@ -3022,7 +3072,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) indir[i] = (u32)(lut[i]); out: - devm_kfree(&pf->pdev->dev, lut); + kfree(lut); return ret; } @@ -3043,8 +3093,10 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct device *dev; u8 *seed = NULL; + dev = ice_pf_to_dev(pf); if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; @@ -3057,8 +3109,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = - devm_kzalloc(&pf->pdev->dev, - ICE_VSIQF_HKEY_ARRAY_SIZE, + devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE, GFP_KERNEL); if (!vsi->rss_hkey_user) return -ENOMEM; @@ -3068,8 +3119,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, } if (!vsi->rss_lut_user) { - vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev, - vsi->rss_table_size, + vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size, GFP_KERNEL); if (!vsi->rss_lut_user) return -ENOMEM; @@ -3092,6 +3142,188 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return 0; } +/** + * ice_get_max_txq - return the maximum number of Tx queues for in a PF + * @pf: PF structure + */ +static int ice_get_max_txq(struct ice_pf *pf) +{ + return min_t(int, num_online_cpus(), + pf->hw.func_caps.common_cap.num_txq); +} + +/** + * ice_get_max_rxq - return the maximum number of Rx queues for in a PF + * @pf: PF structure + */ +static int ice_get_max_rxq(struct ice_pf *pf) +{ + return min_t(int, num_online_cpus(), + pf->hw.func_caps.common_cap.num_rxq); +} + +/** + * ice_get_combined_cnt - return the current number of combined channels + * @vsi: PF VSI pointer + * + * Go through all queue vectors and count ones that have both Rx and Tx ring + * attached + */ +static u32 ice_get_combined_cnt(struct ice_vsi *vsi) +{ + u32 combined = 0; + int q_idx; + + ice_for_each_q_vector(vsi, q_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring && q_vector->tx.ring) + combined++; + } + + return combined; +} + +/** + * ice_get_channels - get the current and max supported channels + * @dev: network interface device structure + * @ch: ethtool channel data structure + */ +static void +ice_get_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + /* check to see if VSI is active */ + if (test_bit(__ICE_DOWN, vsi->state)) + return; + + /* report maximum channels */ + ch->max_rx = ice_get_max_rxq(pf); + ch->max_tx = ice_get_max_txq(pf); + ch->max_combined = min_t(int, ch->max_rx, ch->max_tx); + + /* report current channels */ + ch->combined_count = ice_get_combined_cnt(vsi); + ch->rx_count = vsi->num_rxq - ch->combined_count; + ch->tx_count = vsi->num_txq - ch->combined_count; +} + +/** + * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size + * @vsi: VSI to reconfigure RSS LUT on + * @req_rss_size: requested range of queue numbers for hashing + * + * Set the VSI's RSS parameters, configure the RSS LUT based on these. + */ +static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + struct ice_hw *hw; + int err = 0; + u8 *lut; + + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + + if (!req_rss_size) + return -EINVAL; + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* set RSS LUT parameters */ + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + vsi->rss_size = 1; + } else { + struct ice_hw_common_caps *caps = &hw->func_caps.common_cap; + + vsi->rss_size = min_t(int, req_rss_size, + BIT(caps->rss_table_entry_width)); + } + + /* create/set RSS LUT */ + ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); + status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, + vsi->rss_table_size); + if (status) { + dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + err = -EIO; + } + + kfree(lut); + return err; +} + +/** + * ice_set_channels - set the number channels + * @dev: network interface device structure + * @ch: ethtool channel data structure + */ +static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int new_rx = 0, new_tx = 0; + u32 curr_combined; + + /* do not support changing channels in Safe Mode */ + if (ice_is_safe_mode(pf)) { + netdev_err(dev, "Changing channel in Safe Mode is not supported\n"); + return -EOPNOTSUPP; + } + /* do not support changing other_count */ + if (ch->other_count) + return -EINVAL; + + curr_combined = ice_get_combined_cnt(vsi); + + /* these checks are for cases where user didn't specify a particular + * value on cmd line but we get non-zero value anyway via + * get_channels(); look at ethtool.c in ethtool repository (the user + * space part), particularly, do_schannels() routine + */ + if (ch->rx_count == vsi->num_rxq - curr_combined) + ch->rx_count = 0; + if (ch->tx_count == vsi->num_txq - curr_combined) + ch->tx_count = 0; + if (ch->combined_count == curr_combined) + ch->combined_count = 0; + + if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) { + netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n"); + return -EINVAL; + } + + new_rx = ch->combined_count + ch->rx_count; + new_tx = ch->combined_count + ch->tx_count; + + if (new_rx > ice_get_max_rxq(pf)) { + netdev_err(dev, "Maximum allowed Rx channels is %d\n", + ice_get_max_rxq(pf)); + return -EINVAL; + } + if (new_tx > ice_get_max_txq(pf)) { + netdev_err(dev, "Maximum allowed Tx channels is %d\n", + ice_get_max_txq(pf)); + return -EINVAL; + } + + ice_vsi_recfg_qs(vsi, new_rx, new_tx); + + if (new_rx && !netif_is_rxfh_configured(dev)) + return ice_vsi_set_dflt_rss_lut(vsi, new_rx); + + return 0; +} + enum ice_container_type { ICE_RX_CONTAINER, ICE_TX_CONTAINER, @@ -3131,7 +3363,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; break; default: - dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type); + dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type); return -EINVAL; } @@ -3271,7 +3503,8 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, break; default: - dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type); + dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n", + c_type); return -EINVAL; } @@ -3368,10 +3601,17 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct ice_vsi *vsi = np->vsi; if (q_num < 0) { - int i; + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) { + /* In some cases if DCB is configured the num_[rx|tx]q + * can be less than vsi->num_q_vectors. This check + * accounts for that so we don't report a false failure + */ + if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq) + goto set_complete; - ice_for_each_q_vector(vsi, i) { - if (ice_set_q_coalesce(vsi, ec, i)) + if (ice_set_q_coalesce(vsi, ec, v_idx)) return -EINVAL; } goto set_complete; @@ -3398,6 +3638,151 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, return __ice_set_coalesce(netdev, ec, q_num); } +#define ICE_I2C_EEPROM_DEV_ADDR 0xA0 +#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2 +#define ICE_MODULE_TYPE_SFP 0x03 +#define ICE_MODULE_TYPE_QSFP_PLUS 0x0D +#define ICE_MODULE_TYPE_QSFP28 0x11 +#define ICE_MODULE_SFF_ADDR_MODE 0x04 +#define ICE_MODULE_SFF_DIAG_CAPAB 0x40 +#define ICE_MODULE_REVISION_ADDR 0x01 +#define ICE_MODULE_SFF_8472_COMP 0x5E +#define ICE_MODULE_SFF_8472_SWAP 0x5C +#define ICE_MODULE_QSFP_MAX_LEN 640 + +/** + * ice_get_module_info - get SFF module type and revision information + * @netdev: network interface device structure + * @modinfo: module EEPROM size and layout information structure + */ +static int +ice_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + u8 sff8472_comp = 0; + u8 sff8472_swap = 0; + u8 sff8636_rev = 0; + u8 value = 0; + + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00, + 0, &value, 1, 0, NULL); + if (status) + return -EIO; + + switch (value) { + case ICE_MODULE_TYPE_SFP: + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_SFF_8472_COMP, 0x00, 0, + &sff8472_comp, 1, 0, NULL); + if (status) + return -EIO; + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_SFF_8472_SWAP, 0x00, 0, + &sff8472_swap, 1, 0, NULL); + if (status) + return -EIO; + + if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else if (sff8472_comp && + (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } + break; + case ICE_MODULE_TYPE_QSFP_PLUS: + case ICE_MODULE_TYPE_QSFP28: + status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, + ICE_MODULE_REVISION_ADDR, 0x00, 0, + &sff8636_rev, 1, 0, NULL); + if (status) + return -EIO; + /* Check revision compliance */ + if (sff8636_rev > 0x02) { + /* Module is SFF-8636 compliant */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN; + } + break; + default: + netdev_warn(netdev, + "SFF Module Type not recognized.\n"); + return -EINVAL; + } + return 0; +} + +/** + * ice_get_module_eeprom - fill buffer with SFF EEPROM contents + * @netdev: network interface device structure + * @ee: EEPROM dump request structure + * @data: buffer to be filled with EEPROM contents + */ +static int +ice_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + u8 addr = ICE_I2C_EEPROM_DEV_ADDR; + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + bool is_sfp = false; + u16 offset = 0; + u8 value = 0; + u8 page = 0; + int i; + + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, + &value, 1, 0, NULL); + if (status) + return -EIO; + + if (!ee || !ee->len || !data) + return -EINVAL; + + if (value == ICE_MODULE_TYPE_SFP) + is_sfp = true; + + for (i = 0; i < ee->len; i++) { + offset = i + ee->offset; + + /* Check if we need to access the other memory page */ + if (is_sfp) { + if (offset >= ETH_MODULE_SFF_8079_LEN) { + offset -= ETH_MODULE_SFF_8079_LEN; + addr = ICE_I2C_EEPROM_DEV_ADDR2; + } + } else { + while (offset >= ETH_MODULE_SFF_8436_LEN) { + /* Compute memory page number and offset. */ + offset -= ETH_MODULE_SFF_8436_LEN / 2; + page++; + } + } + + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp, + &value, 1, 0, NULL); + if (status) + value = 0; + data[i] = value; + } + return 0; +} + static const struct ethtool_ops ice_ethtool_ops = { .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, @@ -3428,11 +3813,15 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_rxfh_indir_size = ice_get_rxfh_indir_size, .get_rxfh = ice_get_rxfh, .set_rxfh = ice_set_rxfh, + .get_channels = ice_get_channels, + .set_channels = ice_set_channels, .get_ts_info = ethtool_op_get_ts_info, .get_per_queue_coalesce = ice_get_per_q_coalesce, .set_per_queue_coalesce = ice_set_per_q_coalesce, .get_fecparam = ice_get_fecparam, .set_fecparam = ice_set_fecparam, + .get_module_info = ice_get_module_info, + .get_module_eeprom = ice_get_module_eeprom, }; static const struct ethtool_ops ice_ethtool_safe_mode_ops = { @@ -3451,6 +3840,7 @@ static const struct ethtool_ops ice_ethtool_safe_mode_ops = { .get_ringparam = ice_get_ringparam, .set_ringparam = ice_set_ringparam, .nway_reset = ice_nway_reset, + .get_channels = ice_get_channels, }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 152fbd556e9b..e8f32350fed2 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -52,6 +52,9 @@ #define PF_MBX_ATQLEN_ATQLEN_M ICE_M(0x3FF, 0) #define PF_MBX_ATQLEN_ATQENABLE_M BIT(31) #define PF_MBX_ATQT 0x0022E300 +#define PRTDCB_GENC 0x00083000 +#define PRTDCB_GENC_PFCLDA_S 16 +#define PRTDCB_GENC_PFCLDA_M ICE_M(0xFFFF, 16) #define PRTDCB_GENS 0x00083020 #define PRTDCB_GENS_DCBX_STATUS_S 0 #define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 2aac8f13daeb..ad34f22d44ef 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -211,7 +211,7 @@ enum ice_flex_rx_mdid { /* Rx/Tx Flag64 packet flag bits */ enum ice_flg64_bits { ICE_FLG_PKT_DSI = 0, - ICE_FLG_EVLAN_x8100 = 15, + ICE_FLG_EVLAN_x8100 = 14, ICE_FLG_EVLAN_x9100, ICE_FLG_VLAN_x8100, ICE_FLG_TNL_MAC = 22, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index cc755382df25..e7449248fab4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2,232 +2,26 @@ /* Copyright (c) 2018, Intel Corporation. */ #include "ice.h" +#include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" /** - * ice_setup_rx_ctx - Configure a receive ring context - * @ring: The Rx ring to configure - * - * Configure the Rx descriptor ring in RLAN context. + * ice_vsi_type_str - maps VSI type enum to string equivalents + * @type: VSI type enum */ -static int ice_setup_rx_ctx(struct ice_ring *ring) +const char *ice_vsi_type_str(enum ice_vsi_type type) { - struct ice_vsi *vsi = ring->vsi; - struct ice_hw *hw = &vsi->back->hw; - u32 rxdid = ICE_RXDID_FLEX_NIC; - struct ice_rlan_ctx rlan_ctx; - u32 regval; - u16 pf_q; - int err; - - /* what is Rx queue number in global space of 2K Rx queues */ - pf_q = vsi->rxq_map[ring->q_index]; - - /* clear the context structure first */ - memset(&rlan_ctx, 0, sizeof(rlan_ctx)); - - rlan_ctx.base = ring->dma >> 7; - - rlan_ctx.qlen = ring->count; - - /* Receive Packet Data Buffer Size. - * The Packet Data Buffer Size is defined in 128 byte units. - */ - rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; - - /* use 32 byte descriptors */ - rlan_ctx.dsize = 1; - - /* Strip the Ethernet CRC bytes before the packet is posted to host - * memory. - */ - rlan_ctx.crcstrip = 1; - - /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ - rlan_ctx.l2tsel = 1; - - rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; - rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; - rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; - - /* This controls whether VLAN is stripped from inner headers - * The VLAN in the inner L2 header is stripped to the receive - * descriptor if enabled by this flag. - */ - rlan_ctx.showiv = 0; - - /* Max packet size for this queue - must not be set to a larger value - * than 5 x DBUF - */ - rlan_ctx.rxmax = min_t(u16, vsi->max_frame, - ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len); - - /* Rx queue threshold in units of 64 */ - rlan_ctx.lrxqthresh = 1; - - /* Enable Flexible Descriptors in the queue context which - * allows this driver to select a specific receive descriptor format - */ - if (vsi->type != ICE_VSI_VF) { - regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); - regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & - QRXFLXP_CNTXT_RXDID_IDX_M; - - /* increasing context priority to pick up profile ID; - * default is 0x01; setting to 0x03 to ensure profile - * is programming if prev context is of same priority - */ - regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & - QRXFLXP_CNTXT_RXDID_PRIO_M; - - wr32(hw, QRXFLXP_CNTXT(pf_q), regval); - } - - /* Absolute queue number out of 2K needs to be passed */ - err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); - if (err) { - dev_err(&vsi->back->pdev->dev, - "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", - pf_q, err); - return -EIO; - } - - if (vsi->type == ICE_VSI_VF) - return 0; - - /* init queue specific tail register */ - ring->tail = hw->hw_addr + QRX_TAIL(pf_q); - writel(0, ring->tail); - ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); - - return 0; -} - -/** - * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance - * @ring: The Tx ring to configure - * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized - * @pf_q: queue index in the PF space - * - * Configure the Tx descriptor ring in TLAN context. - */ -static void -ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) -{ - struct ice_vsi *vsi = ring->vsi; - struct ice_hw *hw = &vsi->back->hw; - - tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; - - tlan_ctx->port_num = vsi->port_info->lport; - - /* Transmit Queue Length */ - tlan_ctx->qlen = ring->count; - - ice_set_cgd_num(tlan_ctx, ring); - - /* PF number */ - tlan_ctx->pf_num = hw->pf_id; - - /* queue belongs to a specific VSI type - * VF / VM index should be programmed per vmvf_type setting: - * for vmvf_type = VF, it is VF number between 0-256 - * for vmvf_type = VM, it is VM number between 0-767 - * for PF or EMP this field should be set to zero - */ - switch (vsi->type) { - case ICE_VSI_LB: - /* fall through */ + switch (type) { case ICE_VSI_PF: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; - break; + return "ICE_VSI_PF"; case ICE_VSI_VF: - /* Firmware expects vmvf_num to be absolute VF ID */ - tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id; - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; - break; + return "ICE_VSI_VF"; + case ICE_VSI_LB: + return "ICE_VSI_LB"; default: - return; - } - - /* make sure the context is associated with the right VSI */ - tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); - - tlan_ctx->tso_ena = ICE_TX_LEGACY; - tlan_ctx->tso_qnum = pf_q; - - /* Legacy or Advanced Host Interface: - * 0: Advanced Host Interface - * 1: Legacy Host Interface - */ - tlan_ctx->legacy_int = ICE_TX_LEGACY; -} - -/** - * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled - * @pf: the PF being configured - * @pf_q: the PF queue - * @ena: enable or disable state of the queue - * - * This routine will wait for the given Rx queue of the PF to reach the - * enabled or disabled state. - * Returns -ETIMEDOUT in case of failing to reach the requested state after - * multiple retries; else will return 0 in case of success. - */ -static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) -{ - int i; - - for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) { - if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) & - QRX_CTRL_QENA_STAT_M)) - return 0; - - usleep_range(20, 40); + return "unknown"; } - - return -ETIMEDOUT; -} - -/** - * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring - * @vsi: the VSI being configured - * @ena: start or stop the Rx rings - * @rxq_idx: Rx queue index - */ -#ifndef CONFIG_PCI_IOV -static -#endif /* !CONFIG_PCI_IOV */ -int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx) -{ - int pf_q = vsi->rxq_map[rxq_idx]; - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - int ret = 0; - u32 rx_reg; - - rx_reg = rd32(hw, QRX_CTRL(pf_q)); - - /* Skip if the queue is already in the requested state */ - if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) - return 0; - - /* turn on/off the queue */ - if (ena) - rx_reg |= QRX_CTRL_QENA_REQ_M; - else - rx_reg &= ~QRX_CTRL_QENA_REQ_M; - wr32(hw, QRX_CTRL(pf_q), rx_reg); - - /* wait for the change to finish */ - ret = ice_pf_rxq_wait(pf, pf_q, ena); - if (ret) - dev_err(&pf->pdev->dev, - "VSI idx %d Rx ring %d %sable timeout\n", - vsi->idx, pf_q, (ena ? "en" : "dis")); - - return ret; } /** @@ -258,36 +52,39 @@ static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena) static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; + + dev = ice_pf_to_dev(pf); /* allocate memory for both Tx and Rx ring pointers */ - vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, + vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq, sizeof(*vsi->tx_rings), GFP_KERNEL); if (!vsi->tx_rings) return -ENOMEM; - vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, + vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq, sizeof(*vsi->rx_rings), GFP_KERNEL); if (!vsi->rx_rings) goto err_rings; - vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, + /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */ + vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq), sizeof(*vsi->txq_map), GFP_KERNEL); if (!vsi->txq_map) goto err_txq_map; - vsi->rxq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, + vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq, sizeof(*vsi->rxq_map), GFP_KERNEL); if (!vsi->rxq_map) goto err_rxq_map; - /* There is no need to allocate q_vectors for a loopback VSI. */ if (vsi->type == ICE_VSI_LB) return 0; /* allocate memory for q_vector pointers */ - vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors, + vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors, sizeof(*vsi->q_vectors), GFP_KERNEL); if (!vsi->q_vectors) goto err_vectors; @@ -295,13 +92,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) return 0; err_vectors: - devm_kfree(&pf->pdev->dev, vsi->rxq_map); + devm_kfree(dev, vsi->rxq_map); err_rxq_map: - devm_kfree(&pf->pdev->dev, vsi->txq_map); + devm_kfree(dev, vsi->txq_map); err_txq_map: - devm_kfree(&pf->pdev->dev, vsi->rx_rings); + devm_kfree(dev, vsi->rx_rings); err_rings: - devm_kfree(&pf->pdev->dev, vsi->tx_rings); + devm_kfree(dev, vsi->tx_rings); return -ENOMEM; } @@ -345,15 +142,24 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) case ICE_VSI_PF: vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf), num_online_cpus()); + if (vsi->req_txq) { + vsi->alloc_txq = vsi->req_txq; + vsi->num_txq = vsi->req_txq; + } pf->num_lan_tx = vsi->alloc_txq; /* only 1 Rx queue unless RSS is enabled */ - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { vsi->alloc_rxq = 1; - else + } else { vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf), num_online_cpus()); + if (vsi->req_rxq) { + vsi->alloc_rxq = vsi->req_rxq; + vsi->num_rxq = vsi->req_rxq; + } + } pf->num_lan_rx = vsi->alloc_rxq; @@ -375,7 +181,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = 1; break; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type); break; } @@ -421,7 +227,7 @@ void ice_vsi_delete(struct ice_vsi *vsi) struct ice_vsi_ctx *ctxt; enum ice_status status; - ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return; @@ -433,10 +239,10 @@ void ice_vsi_delete(struct ice_vsi *vsi) status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL); if (status) - dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n", - vsi->vsi_num); + dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n", + vsi->vsi_num, status); - devm_kfree(&pf->pdev->dev, ctxt); + kfree(ctxt); } /** @@ -446,26 +252,29 @@ void ice_vsi_delete(struct ice_vsi *vsi) static void ice_vsi_free_arrays(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; + + dev = ice_pf_to_dev(pf); /* free the ring and vector containers */ if (vsi->q_vectors) { - devm_kfree(&pf->pdev->dev, vsi->q_vectors); + devm_kfree(dev, vsi->q_vectors); vsi->q_vectors = NULL; } if (vsi->tx_rings) { - devm_kfree(&pf->pdev->dev, vsi->tx_rings); + devm_kfree(dev, vsi->tx_rings); vsi->tx_rings = NULL; } if (vsi->rx_rings) { - devm_kfree(&pf->pdev->dev, vsi->rx_rings); + devm_kfree(dev, vsi->rx_rings); vsi->rx_rings = NULL; } if (vsi->txq_map) { - devm_kfree(&pf->pdev->dev, vsi->txq_map); + devm_kfree(dev, vsi->txq_map); vsi->txq_map = NULL; } if (vsi->rxq_map) { - devm_kfree(&pf->pdev->dev, vsi->rxq_map); + devm_kfree(dev, vsi->rxq_map); vsi->rxq_map = NULL; } } @@ -482,6 +291,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) int ice_vsi_clear(struct ice_vsi *vsi) { struct ice_pf *pf = NULL; + struct device *dev; if (!vsi) return 0; @@ -490,10 +300,10 @@ int ice_vsi_clear(struct ice_vsi *vsi) return -EINVAL; pf = vsi->back; + dev = ice_pf_to_dev(pf); if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) { - dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n", - vsi->idx); + dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx); return -EINVAL; } @@ -506,7 +316,7 @@ int ice_vsi_clear(struct ice_vsi *vsi) ice_vsi_free_arrays(vsi); mutex_unlock(&pf->sw_mutex); - devm_kfree(&pf->pdev->dev, vsi); + devm_kfree(dev, vsi); return 0; } @@ -539,6 +349,7 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) static struct ice_vsi * ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) { + struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; /* Need to protect the allocation of the VSIs at the PF level */ @@ -549,11 +360,11 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) * is available to be populated */ if (pf->next_vsi == ICE_NO_VSI) { - dev_dbg(&pf->pdev->dev, "out of VSI slots!\n"); + dev_dbg(dev, "out of VSI slots!\n"); goto unlock_pf; } - vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL); + vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL); if (!vsi) goto unlock_pf; @@ -585,7 +396,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) goto err_rings; break; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + dev_warn(dev, "Unknown VSI type %d\n", vsi->type); goto unlock_pf; } @@ -598,7 +409,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) goto unlock_pf; err_rings: - devm_kfree(&pf->pdev->dev, vsi); + devm_kfree(dev, vsi); vsi = NULL; unlock_pf: mutex_unlock(&pf->sw_mutex); @@ -606,88 +417,6 @@ unlock_pf: } /** - * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI - * @qs_cfg: gathered variables needed for PF->VSI queues assignment - * - * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap - */ -static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) -{ - int offset, i; - - mutex_lock(qs_cfg->qs_mutex); - offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size, - 0, qs_cfg->q_count, 0); - if (offset >= qs_cfg->pf_map_size) { - mutex_unlock(qs_cfg->qs_mutex); - return -ENOMEM; - } - - bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count); - for (i = 0; i < qs_cfg->q_count; i++) - qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset; - mutex_unlock(qs_cfg->qs_mutex); - - return 0; -} - -/** - * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI - * @qs_cfg: gathered variables needed for pf->vsi queues assignment - * - * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap - */ -static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg) -{ - int i, index = 0; - - mutex_lock(qs_cfg->qs_mutex); - for (i = 0; i < qs_cfg->q_count; i++) { - index = find_next_zero_bit(qs_cfg->pf_map, - qs_cfg->pf_map_size, index); - if (index >= qs_cfg->pf_map_size) - goto err_scatter; - set_bit(index, qs_cfg->pf_map); - qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index; - } - mutex_unlock(qs_cfg->qs_mutex); - - return 0; -err_scatter: - for (index = 0; index < i; index++) { - clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map); - qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0; - } - mutex_unlock(qs_cfg->qs_mutex); - - return -ENOMEM; -} - -/** - * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI - * @qs_cfg: gathered variables needed for pf->vsi queues assignment - * - * This function first tries to find contiguous space. If it is not successful, - * it tries with the scatter approach. - * - * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap - */ -static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg) -{ - int ret = 0; - - ret = __ice_vsi_get_qs_contig(qs_cfg); - if (ret) { - /* contig failed, so try with scatter approach */ - qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER; - qs_cfg->q_count = min_t(u16, qs_cfg->q_count, - qs_cfg->scatter_count); - ret = __ice_vsi_get_qs_sc(qs_cfg); - } - return ret; -} - -/** * ice_vsi_get_qs - Assign queues from PF to VSI * @vsi: the VSI to assign queues to * @@ -769,14 +498,15 @@ bool ice_is_safe_mode(struct ice_pf *pf) */ static void ice_rss_clean(struct ice_vsi *vsi) { - struct ice_pf *pf; + struct ice_pf *pf = vsi->back; + struct device *dev; - pf = vsi->back; + dev = ice_pf_to_dev(pf); if (vsi->rss_hkey_user) - devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user); + devm_kfree(dev, vsi->rss_hkey_user); if (vsi->rss_lut_user) - devm_kfree(&pf->pdev->dev, vsi->rss_lut_user); + devm_kfree(dev, vsi->rss_lut_user); } /** @@ -814,7 +544,7 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) case ICE_VSI_LB: break; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", + dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type); break; } @@ -918,7 +648,9 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) else max_rss = ICE_MAX_SMALL_RSS_QS; qcount_rx = min_t(int, rx_numq_tc, max_rss); - qcount_rx = min_t(int, qcount_rx, vsi->rss_size); + if (!vsi->req_rxq) + qcount_rx = min_t(int, qcount_rx, + vsi->rss_size); } } @@ -990,9 +722,11 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) { u8 lut_type, hash_type; + struct device *dev; struct ice_pf *pf; pf = vsi->back; + dev = ice_pf_to_dev(pf); switch (vsi->type) { case ICE_VSI_PF: @@ -1006,10 +740,11 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; break; case ICE_VSI_LB: - dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type); + dev_dbg(dev, "Unsupported VSI type %s\n", + ice_vsi_type_str(vsi->type)); return; default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + dev_warn(dev, "Unknown VSI type %d\n", vsi->type); return; } @@ -1022,18 +757,21 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) /** * ice_vsi_init - Create and initialize a VSI * @vsi: the VSI being configured + * @init_vsi: is this call creating a VSI * * This initializes a VSI context depending on the VSI type to be added and * passes it down to the add_vsi aq command to create a new VSI. */ -static int ice_vsi_init(struct ice_vsi *vsi) +static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; struct ice_vsi_ctx *ctxt; + struct device *dev; int ret = 0; - ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -1050,7 +788,8 @@ static int ice_vsi_init(struct ice_vsi *vsi) ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id; break; default: - return -ENODEV; + ret = -ENODEV; + goto out; } ice_set_dflt_vsi_ctx(ctxt); @@ -1059,11 +798,24 @@ static int ice_vsi_init(struct ice_vsi *vsi) ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; /* Set LUT type and HASH type if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { ice_set_rss_vsi_ctx(ctxt, vsi); + /* if updating VSI context, make sure to set valid_section: + * to indicate which section of VSI context being updated + */ + if (!init_vsi) + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + } ctxt->info.sw_id = vsi->port_info->sw_id; ice_vsi_setup_q_map(vsi, ctxt); + if (!init_vsi) /* means VSI being updated */ + /* must to indicate which section of VSI context are + * being modified + */ + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); /* Enable MAC Antispoof with new VSI being initialized or updated */ if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) { @@ -1080,11 +832,20 @@ static int ice_vsi_init(struct ice_vsi *vsi) cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); } - ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL); - if (ret) { - dev_err(&pf->pdev->dev, - "Add VSI failed, err %d\n", ret); - return -EIO; + if (init_vsi) { + ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL); + if (ret) { + dev_err(dev, "Add VSI failed, err %d\n", ret); + ret = -EIO; + goto out; + } + } else { + ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL); + if (ret) { + dev_err(dev, "Update VSI failed, err %d\n", ret); + ret = -EIO; + goto out; + } } /* keep context for update VSI operations */ @@ -1093,131 +854,9 @@ static int ice_vsi_init(struct ice_vsi *vsi) /* record VSI number returned */ vsi->vsi_num = ctxt->vsi_num; - devm_kfree(&pf->pdev->dev, ctxt); - return ret; -} - -/** - * ice_free_q_vector - Free memory allocated for a specific interrupt vector - * @vsi: VSI having the memory freed - * @v_idx: index of the vector to be freed - */ -static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) -{ - struct ice_q_vector *q_vector; - struct ice_pf *pf = vsi->back; - struct ice_ring *ring; - - if (!vsi->q_vectors[v_idx]) { - dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n", - v_idx); - return; - } - q_vector = vsi->q_vectors[v_idx]; - - ice_for_each_ring(ring, q_vector->tx) - ring->q_vector = NULL; - ice_for_each_ring(ring, q_vector->rx) - ring->q_vector = NULL; - - /* only VSI with an associated netdev is set up with NAPI */ - if (vsi->netdev) - netif_napi_del(&q_vector->napi); - - devm_kfree(&pf->pdev->dev, q_vector); - vsi->q_vectors[v_idx] = NULL; -} - -/** - * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors - * @vsi: the VSI having memory freed - */ -void ice_vsi_free_q_vectors(struct ice_vsi *vsi) -{ - int v_idx; - - ice_for_each_q_vector(vsi, v_idx) - ice_free_q_vector(vsi, v_idx); -} - -/** - * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector - * @vsi: the VSI being configured - * @v_idx: index of the vector in the VSI struct - * - * We allocate one q_vector. If allocation fails we return -ENOMEM. - */ -static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) -{ - struct ice_pf *pf = vsi->back; - struct ice_q_vector *q_vector; - - /* allocate q_vector */ - q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) - return -ENOMEM; - - q_vector->vsi = vsi; - q_vector->v_idx = v_idx; - if (vsi->type == ICE_VSI_VF) - goto out; - /* only set affinity_mask if the CPU is online */ - if (cpu_online(v_idx)) - cpumask_set_cpu(v_idx, &q_vector->affinity_mask); - - /* This will not be called in the driver load path because the netdev - * will not be created yet. All other cases with register the NAPI - * handler here (i.e. resume, reset/rebuild, etc.) - */ - if (vsi->netdev) - netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, - NAPI_POLL_WEIGHT); - out: - /* tie q_vector and VSI together */ - vsi->q_vectors[v_idx] = q_vector; - - return 0; -} - -/** - * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors - * @vsi: the VSI being configured - * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. - */ -static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int v_idx = 0, num_q_vectors; - int err; - - if (vsi->q_vectors[0]) { - dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n", - vsi->vsi_num); - return -EEXIST; - } - - num_q_vectors = vsi->num_q_vectors; - - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - err = ice_vsi_alloc_q_vector(vsi, v_idx); - if (err) - goto err_out; - } - - return 0; - -err_out: - while (v_idx--) - ice_free_q_vector(vsi, v_idx); - - dev_err(&pf->pdev->dev, - "Failed to allocate %d q_vector for VSI %d, ret=%d\n", - vsi->num_q_vectors, vsi->vsi_num, err); - vsi->num_q_vectors = 0; - return err; + kfree(ctxt); + return ret; } /** @@ -1233,14 +872,16 @@ err_out: static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; u16 num_q_vectors; + dev = ice_pf_to_dev(pf); /* SRIOV doesn't grab irq_tracker entries for each VSI */ if (vsi->type == ICE_VSI_VF) return 0; if (vsi->base_vector) { - dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n", + dev_dbg(dev, "VSI %d has non-zero base vector %d\n", vsi->vsi_num, vsi->base_vector); return -EEXIST; } @@ -1250,7 +891,7 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); if (vsi->base_vector < 0) { - dev_err(&pf->pdev->dev, + dev_err(dev, "Failed to get tracking for %d vectors for VSI %d, err=%d\n", num_q_vectors, vsi->vsi_num, vsi->base_vector); return -ENOENT; @@ -1293,8 +934,10 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) static int ice_vsi_alloc_rings(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + struct device *dev; int i; + dev = ice_pf_to_dev(pf); /* Allocate Tx rings */ for (i = 0; i < vsi->alloc_txq; i++) { struct ice_ring *ring; @@ -1309,7 +952,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->reg_idx = vsi->txq_map[i]; ring->ring_active = false; ring->vsi = vsi; - ring->dev = &pf->pdev->dev; + ring->dev = dev; ring->count = vsi->num_tx_desc; vsi->tx_rings[i] = ring; } @@ -1328,7 +971,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->ring_active = false; ring->vsi = vsi; ring->netdev = vsi->netdev; - ring->dev = &pf->pdev->dev; + ring->dev = dev; ring->count = vsi->num_rx_desc; vsi->rx_rings[i] = ring; } @@ -1341,66 +984,6 @@ err_out: } /** - * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors - * @vsi: the VSI being configured - * - * This function maps descriptor rings to the queue-specific vectors allotted - * through the MSI-X enabling code. On a constrained vector budget, we map Tx - * and Rx rings to the vector as "efficiently" as possible. - */ -#ifdef CONFIG_DCB -void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) -#else -static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) -#endif /* CONFIG_DCB */ -{ - int q_vectors = vsi->num_q_vectors; - int tx_rings_rem, rx_rings_rem; - int v_id; - - /* initially assigning remaining rings count to VSIs num queue value */ - tx_rings_rem = vsi->num_txq; - rx_rings_rem = vsi->num_rxq; - - for (v_id = 0; v_id < q_vectors; v_id++) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; - int tx_rings_per_v, rx_rings_per_v, q_id, q_base; - - /* Tx rings mapping to vector */ - tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); - q_vector->num_ring_tx = tx_rings_per_v; - q_vector->tx.ring = NULL; - q_vector->tx.itr_idx = ICE_TX_ITR; - q_base = vsi->num_txq - tx_rings_rem; - - for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { - struct ice_ring *tx_ring = vsi->tx_rings[q_id]; - - tx_ring->q_vector = q_vector; - tx_ring->next = q_vector->tx.ring; - q_vector->tx.ring = tx_ring; - } - tx_rings_rem -= tx_rings_per_v; - - /* Rx rings mapping to vector */ - rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); - q_vector->num_ring_rx = rx_rings_per_v; - q_vector->rx.ring = NULL; - q_vector->rx.itr_idx = ICE_RX_ITR; - q_base = vsi->num_rxq - rx_rings_rem; - - for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { - struct ice_ring *rx_ring = vsi->rx_rings[q_id]; - - rx_ring->q_vector = q_vector; - rx_ring->next = q_vector->rx.ring; - q_vector->rx.ring = rx_ring; - } - rx_rings_rem -= rx_rings_per_v; - } -} - -/** * ice_vsi_manage_rss_lut - disable/enable RSS * @vsi: the VSI being changed * @ena: boolean value indicating if this is an enable or disable request @@ -1414,8 +997,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) int err = 0; u8 *lut; - lut = devm_kzalloc(&vsi->back->pdev->dev, vsi->rss_table_size, - GFP_KERNEL); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; @@ -1428,7 +1010,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) } err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size); - devm_kfree(&vsi->back->pdev->dev, lut); + kfree(lut); return err; } @@ -1441,12 +1023,14 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) struct ice_aqc_get_set_rss_keys *key; struct ice_pf *pf = vsi->back; enum ice_status status; + struct device *dev; int err = 0; u8 *lut; + dev = ice_pf_to_dev(pf); vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq); - lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; @@ -1459,13 +1043,12 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) vsi->rss_table_size); if (status) { - dev_err(&pf->pdev->dev, - "set_rss_lut failed, error %d\n", status); + dev_err(dev, "set_rss_lut failed, error %d\n", status); err = -EIO; goto ice_vsi_cfg_rss_exit; } - key = devm_kzalloc(&pf->pdev->dev, sizeof(*key), GFP_KERNEL); + key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) { err = -ENOMEM; goto ice_vsi_cfg_rss_exit; @@ -1482,14 +1065,13 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key); if (status) { - dev_err(&pf->pdev->dev, "set_rss_key failed, error %d\n", - status); + dev_err(dev, "set_rss_key failed, error %d\n", status); err = -EIO; } - devm_kfree(&pf->pdev->dev, key); + kfree(key); ice_vsi_cfg_rss_exit: - devm_kfree(&pf->pdev->dev, lut); + kfree(lut); return err; } @@ -1509,7 +1091,7 @@ int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, struct ice_fltr_list_entry *tmp; struct ice_pf *pf = vsi->back; - tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC); + tmp = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*tmp), GFP_ATOMIC); if (!tmp) return -ENOMEM; @@ -1601,9 +1183,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; int err = 0; - tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + tmp = devm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL); if (!tmp) return -ENOMEM; @@ -1620,11 +1204,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) status = ice_add_vlan(&pf->hw, &tmp_add_list); if (status) { err = -ENODEV; - dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n", - vid, vsi->vsi_num); + dev_err(dev, "Failure Adding VLAN %d on VSI %i\n", vid, + vsi->vsi_num); } - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); return err; } @@ -1641,9 +1225,11 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; int err = 0; - list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); if (!list) return -ENOMEM; @@ -1659,21 +1245,46 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) status = ice_remove_vlan(&pf->hw, &tmp_add_list); if (status == ICE_ERR_DOES_NOT_EXIST) { - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n", vid, vsi->vsi_num, status); } else if (status) { - dev_err(&pf->pdev->dev, + dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n", vid, vsi->vsi_num, status); err = -EIO; } - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); return err; } /** + * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length + * @vsi: VSI + */ +void ice_vsi_cfg_frame_size(struct ice_vsi *vsi) +{ + if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) { + vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; + vsi->rx_buf_len = ICE_RXBUF_2048; +#if (PAGE_SIZE < 8192) + } else if (!ICE_2K_TOO_SMALL_WITH_PADDING && + (vsi->netdev->mtu <= ETH_DATA_LEN)) { + vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN; + vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN; +#endif + } else { + vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; +#if (PAGE_SIZE < 8192) + vsi->rx_buf_len = ICE_RXBUF_3072; +#else + vsi->rx_buf_len = ICE_RXBUF_2048; +#endif + } +} + +/** * ice_vsi_cfg_rxqs - Configure the VSI for Rx * @vsi: the VSI being configured * @@ -1687,13 +1298,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_VF) goto setup_rings; - if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN) - vsi->max_frame = vsi->netdev->mtu + - ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - else - vsi->max_frame = ICE_RXBUF_2048; - - vsi->rx_buf_len = ICE_RXBUF_2048; + ice_vsi_cfg_frame_size(vsi); setup_rings: /* set up individual rings */ for (i = 0; i < vsi->num_rxq; i++) { @@ -1712,101 +1317,34 @@ setup_rings: } /** - * ice_vsi_cfg_txq - Configure single Tx queue - * @vsi: the VSI that queue belongs to - * @ring: Tx ring to be configured - * @tc_q_idx: queue index within given TC - * @qg_buf: queue group buffer - * @tc: TC that Tx ring belongs to - */ -static int -ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx, - struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc) -{ - struct ice_tlan_ctx tlan_ctx = { 0 }; - struct ice_aqc_add_txqs_perq *txq; - struct ice_pf *pf = vsi->back; - u8 buf_len = sizeof(*qg_buf); - enum ice_status status; - u16 pf_q; - - pf_q = ring->reg_idx; - ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); - /* copy context contents into the qg_buf */ - qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); - ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, - ice_tlan_ctx_info); - - /* init queue specific tail reg. It is referred as - * transmit comm scheduler queue doorbell. - */ - ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); - - /* Add unique software queue handle of the Tx queue per - * TC into the VSI Tx ring - */ - ring->q_handle = tc_q_idx; - - status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, - 1, qg_buf, buf_len, NULL); - if (status) { - dev_err(&pf->pdev->dev, - "Failed to set LAN Tx queue context, error: %d\n", - status); - return -ENODEV; - } - - /* Add Tx Queue TEID into the VSI Tx ring from the - * response. This will complete configuring and - * enabling the queue. - */ - txq = &qg_buf->txqs[0]; - if (pf_q == le16_to_cpu(txq->txq_id)) - ring->txq_teid = le32_to_cpu(txq->q_teid); - - return 0; -} - -/** * ice_vsi_cfg_txqs - Configure the VSI for Tx * @vsi: the VSI being configured * @rings: Tx ring array to be configured - * @offset: offset within vsi->txq_map * * Return 0 on success and a negative value on error * Configure the Tx VSI for operation. */ static int -ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset) +ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings) { struct ice_aqc_add_tx_qgrp *qg_buf; - struct ice_pf *pf = vsi->back; - u16 q_idx = 0, i; + u16 q_idx = 0; int err = 0; - u8 tc; - qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL); + qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL); if (!qg_buf) return -ENOMEM; qg_buf->num_txqs = 1; - /* set up and configure the Tx queues for each enabled TC */ - ice_for_each_traffic_class(tc) { - if (!(vsi->tc_cfg.ena_tc & BIT(tc))) - break; - - for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) { - err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset, - qg_buf, tc); - if (err) - goto err_cfg_txqs; - - q_idx++; - } + for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf); + if (err) + goto err_cfg_txqs; } + err_cfg_txqs: - devm_kfree(&pf->pdev->dev, qg_buf); + kfree(qg_buf); return err; } @@ -1819,159 +1357,46 @@ err_cfg_txqs: */ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi) { - return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0); -} - -/** - * ice_intrl_usec_to_reg - convert interrupt rate limit to register value - * @intrl: interrupt rate limit in usecs - * @gran: interrupt rate limit granularity in usecs - * - * This function converts a decimal interrupt rate limit in usecs to the format - * expected by firmware. - */ -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) -{ - u32 val = intrl / gran; - - if (val) - return val | GLINT_RATE_INTRL_ENA_M; - return 0; -} - -/** - * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set - * @hw: board specific structure - */ -static void ice_cfg_itr_gran(struct ice_hw *hw) -{ - u32 regval = rd32(hw, GLINT_CTL); - - /* no need to update global register if ITR gran is already set */ - if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) && - (((regval & GLINT_CTL_ITR_GRAN_200_M) >> - GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_100_M) >> - GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_50_M) >> - GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_25_M) >> - GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US)) - return; - - regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) & - GLINT_CTL_ITR_GRAN_200_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) & - GLINT_CTL_ITR_GRAN_100_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) & - GLINT_CTL_ITR_GRAN_50_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) & - GLINT_CTL_ITR_GRAN_25_M); - wr32(hw, GLINT_CTL, regval); -} - -/** - * ice_cfg_itr - configure the initial interrupt throttle values - * @hw: pointer to the HW structure - * @q_vector: interrupt vector that's being configured - * - * Configure interrupt throttling values for the ring containers that are - * associated with the interrupt vector passed in. - */ -static void -ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) -{ - ice_cfg_itr_gran(hw); - - if (q_vector->num_ring_rx) { - struct ice_ring_container *rc = &q_vector->rx; - - /* if this value is set then don't overwrite with default */ - if (!rc->itr_setting) - rc->itr_setting = ICE_DFLT_RX_ITR; - - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } - - if (q_vector->num_ring_tx) { - struct ice_ring_container *rc = &q_vector->tx; - - /* if this value is set then don't overwrite with default */ - if (!rc->itr_setting) - rc->itr_setting = ICE_DFLT_TX_ITR; - - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } + return ice_vsi_cfg_txqs(vsi, vsi->tx_rings); } /** - * ice_cfg_txq_interrupt - configure interrupt on Tx queue + * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI * @vsi: the VSI being configured - * @txq: Tx queue being mapped to MSI-X vector - * @msix_idx: MSI-X vector index within the function - * @itr_idx: ITR index of the interrupt cause * - * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector - * within the function space. + * Return 0 on success and a negative value on error + * Configure the Tx queues dedicated for XDP in given VSI for operation. */ -#ifdef CONFIG_PCI_IOV -void -ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) -#else -static void -ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) -#endif /* CONFIG_PCI_IOV */ +int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - u32 val; + int ret; + int i; - itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M; + ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings); + if (ret) + return ret; - val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | - ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M); + for (i = 0; i < vsi->num_xdp_txq; i++) + vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]); - wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); + return ret; } /** - * ice_cfg_rxq_interrupt - configure interrupt on Rx queue - * @vsi: the VSI being configured - * @rxq: Rx queue being mapped to MSI-X vector - * @msix_idx: MSI-X vector index within the function - * @itr_idx: ITR index of the interrupt cause + * ice_intrl_usec_to_reg - convert interrupt rate limit to register value + * @intrl: interrupt rate limit in usecs + * @gran: interrupt rate limit granularity in usecs * - * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector - * within the function space. + * This function converts a decimal interrupt rate limit in usecs to the format + * expected by firmware. */ -#ifdef CONFIG_PCI_IOV -void -ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) -#else -static void -ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) -#endif /* CONFIG_PCI_IOV */ +u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - u32 val; - - itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M; - - val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | - ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M); - - wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + u32 val = intrl / gran; - ice_flush(hw); + if (val) + return val | GLINT_RATE_INTRL_ENA_M; + return 0; } /** @@ -2028,13 +1453,12 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) */ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) { - struct device *dev = &vsi->back->pdev->dev; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; enum ice_status status; int ret = 0; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -2052,7 +1476,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", + dev_err(&vsi->back->pdev->dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -2060,7 +1484,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) vsi->info.vlan_flags = ctxt->info.vlan_flags; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -2071,13 +1495,12 @@ out: */ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) { - struct device *dev = &vsi->back->pdev->dev; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; enum ice_status status; int ret = 0; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -2099,7 +1522,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", + dev_err(&vsi->back->pdev->dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", ena, status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -2107,7 +1530,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) vsi->info.vlan_flags = ctxt->info.vlan_flags; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -2134,109 +1557,6 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi) } /** - * ice_trigger_sw_intr - trigger a software interrupt - * @hw: pointer to the HW structure - * @q_vector: interrupt vector to trigger the software interrupt for - */ -void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) -{ - wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), - (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) | - GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_INTENA_M); -} - -/** - * ice_vsi_stop_tx_ring - Disable single Tx ring - * @vsi: the VSI being configured - * @rst_src: reset source - * @rel_vmvf_num: Relative ID of VF/VM - * @ring: Tx ring to be stopped - * @txq_meta: Meta data of Tx ring to be stopped - */ -#ifndef CONFIG_PCI_IOV -static -#endif /* !CONFIG_PCI_IOV */ -int -ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring *ring, - struct ice_txq_meta *txq_meta) -{ - struct ice_pf *pf = vsi->back; - struct ice_q_vector *q_vector; - struct ice_hw *hw = &pf->hw; - enum ice_status status; - u32 val; - - /* clear cause_ena bit for disabled queues */ - val = rd32(hw, QINT_TQCTL(ring->reg_idx)); - val &= ~QINT_TQCTL_CAUSE_ENA_M; - wr32(hw, QINT_TQCTL(ring->reg_idx), val); - - /* software is expected to wait for 100 ns */ - ndelay(100); - - /* trigger a software interrupt for the vector - * associated to the queue to schedule NAPI handler - */ - q_vector = ring->q_vector; - if (q_vector) - ice_trigger_sw_intr(hw, q_vector); - - status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx, - txq_meta->tc, 1, &txq_meta->q_handle, - &txq_meta->q_id, &txq_meta->q_teid, rst_src, - rel_vmvf_num, NULL); - - /* if the disable queue command was exercised during an - * active reset flow, ICE_ERR_RESET_ONGOING is returned. - * This is not an error as the reset operation disables - * queues at the hardware level anyway. - */ - if (status == ICE_ERR_RESET_ONGOING) { - dev_dbg(&vsi->back->pdev->dev, - "Reset in progress. LAN Tx queues already disabled\n"); - } else if (status == ICE_ERR_DOES_NOT_EXIST) { - dev_dbg(&vsi->back->pdev->dev, - "LAN Tx queues do not exist, nothing to disable\n"); - } else if (status) { - dev_err(&vsi->back->pdev->dev, - "Failed to disable LAN Tx queues, error: %d\n", status); - return -ENODEV; - } - - return 0; -} - -/** - * ice_fill_txq_meta - Prepare the Tx queue's meta data - * @vsi: VSI that ring belongs to - * @ring: ring that txq_meta will be based on - * @txq_meta: a helper struct that wraps Tx queue's information - * - * Set up a helper struct that will contain all the necessary fields that - * are needed for stopping Tx queue - */ -#ifndef CONFIG_PCI_IOV -static -#endif /* !CONFIG_PCI_IOV */ -void -ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, - struct ice_txq_meta *txq_meta) -{ - u8 tc = 0; - -#ifdef CONFIG_DCB - tc = ring->dcb_tc; -#endif /* CONFIG_DCB */ - txq_meta->q_id = ring->reg_idx; - txq_meta->q_teid = ring->txq_teid; - txq_meta->q_handle = ring->q_handle; - txq_meta->vsi_idx = vsi->idx; - txq_meta->tc = tc; -} - -/** * ice_vsi_stop_tx_rings - Disable Tx rings * @vsi: the VSI being configured * @rst_src: reset source @@ -2247,34 +1567,24 @@ static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num, struct ice_ring **rings) { - u16 i, q_idx = 0; - int status; - u8 tc; + u16 q_idx; if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) return -EINVAL; - /* set up the Tx queue list to be disabled for each enabled TC */ - ice_for_each_traffic_class(tc) { - if (!(vsi->tc_cfg.ena_tc & BIT(tc))) - break; - - for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) { - struct ice_txq_meta txq_meta = { }; - - if (!rings || !rings[q_idx]) - return -EINVAL; + for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + struct ice_txq_meta txq_meta = { }; + int status; - ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta); - status = ice_vsi_stop_tx_ring(vsi, rst_src, - rel_vmvf_num, - rings[q_idx], &txq_meta); + if (!rings || !rings[q_idx]) + return -EINVAL; - if (status) - return status; + ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta); + status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num, + rings[q_idx], &txq_meta); - q_idx++; - } + if (status) + return status; } return 0; @@ -2294,6 +1604,15 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, } /** + * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings + * @vsi: the VSI being configured + */ +int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi) +{ + return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings); +} + +/** * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI * @vsi: VSI to enable or disable VLAN pruning on * @ena: set to true to enable VLAN pruning and false to disable it @@ -2304,7 +1623,6 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) { struct ice_vsi_ctx *ctxt; - struct device *dev; struct ice_pf *pf; int status; @@ -2312,8 +1630,7 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) return -EINVAL; pf = vsi->back; - dev = &pf->pdev->dev; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -2347,11 +1664,11 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) vsi->info.sec_flags = ctxt->info.sec_flags; vsi->info.sw_flags2 = ctxt->info.sw_flags2; - devm_kfree(dev, ctxt); + kfree(ctxt); return 0; err_out: - devm_kfree(dev, ctxt); + kfree(ctxt); return -EIO; } @@ -2420,8 +1737,10 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; - list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); if (!list) return; @@ -2441,11 +1760,11 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); if (status) - dev_err(&pf->pdev->dev, + dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %d\n", vsi->vsi_num, status); - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); } /** @@ -2460,8 +1779,10 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) struct ice_pf *pf = vsi->back; LIST_HEAD(tmp_add_list); enum ice_status status; + struct device *dev; - list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + dev = ice_pf_to_dev(pf); + list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); if (!list) return; @@ -2488,12 +1809,11 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); if (status) - dev_err(&pf->pdev->dev, - "Fail %s %s LLDP rule on VSI %i error: %d\n", + dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n", create ? "adding" : "removing", tx ? "TX" : "RX", vsi->vsi_num, status); - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); } /** @@ -2515,7 +1835,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, enum ice_vsi_type type, u16 vf_id) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); enum ice_status status; struct ice_vsi *vsi; int ret, i; @@ -2551,7 +1871,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ice_vsi_set_tc_cfg(vsi); /* create the VSI */ - ret = ice_vsi_init(vsi); + ret = ice_vsi_init(vsi, true); if (ret) goto unroll_get_qs; @@ -2624,8 +1944,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(&pf->pdev->dev, - "VSI %d failed lan queue config, error %d\n", + dev_err(dev, "VSI %d failed lan queue config, error %d\n", vsi->vsi_num, status); goto unroll_vector_base; } @@ -2635,23 +1954,17 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, * out PAUSE or PFC frames. If enabled, FW can still send FC frames. * The rule is added once for PF VSI in order to create appropriate * recipe, since VSI/VSI list is ignored with drop action... - * Also add rules to handle LLDP Tx and Rx packets. Tx LLDP packets - * need to be dropped so that VFs cannot send LLDP packets to reconfig - * DCB settings in the HW. Also, if the FW DCBX engine is not running - * then Rx LLDP packets need to be redirected up the stack. + * Also add rules to handle LLDP Tx packets. Tx LLDP packets need to + * be dropped so that VFs cannot send LLDP packets to reconfig DCB + * settings in the HW. */ - if (!ice_is_safe_mode(pf)) { + if (!ice_is_safe_mode(pf)) if (vsi->type == ICE_VSI_PF) { ice_vsi_add_rem_eth_mac(vsi, true); /* Tx LLDP packets */ ice_cfg_sw_lldp(vsi, true, true); - - /* Rx LLDP packets */ - if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) - ice_cfg_sw_lldp(vsi, false, true); } - } return vsi; @@ -2690,6 +2003,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0); for (q = 0; q < q_vector->num_ring_tx; q++) { wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); + if (ice_is_xdp_ena_vsi(vsi)) { + u32 xdp_txq = txq + vsi->num_xdp_txq; + + wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0); + } txq++; } @@ -2738,8 +2056,7 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(irq_num, NULL); synchronize_irq(irq_num); - devm_free_irq(&pf->pdev->dev, irq_num, - vsi->q_vectors[i]); + devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]); } } @@ -2790,6 +2107,62 @@ void ice_vsi_close(struct ice_vsi *vsi) } /** + * ice_ena_vsi - resume a VSI + * @vsi: the VSI being resume + * @locked: is the rtnl_lock already held + */ +int ice_ena_vsi(struct ice_vsi *vsi, bool locked) +{ + int err = 0; + + if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) + return 0; + + clear_bit(__ICE_NEEDS_RESTART, vsi->state); + + if (vsi->netdev && vsi->type == ICE_VSI_PF) { + if (netif_running(vsi->netdev)) { + if (!locked) + rtnl_lock(); + + err = ice_open(vsi->netdev); + + if (!locked) + rtnl_unlock(); + } + } + + return err; +} + +/** + * ice_dis_vsi - pause a VSI + * @vsi: the VSI being paused + * @locked: is the rtnl_lock already held + */ +void ice_dis_vsi(struct ice_vsi *vsi, bool locked) +{ + if (test_bit(__ICE_DOWN, vsi->state)) + return; + + set_bit(__ICE_NEEDS_RESTART, vsi->state); + + if (vsi->type == ICE_VSI_PF && vsi->netdev) { + if (netif_running(vsi->netdev)) { + if (!locked) + rtnl_lock(); + + ice_stop(vsi->netdev); + + if (!locked) + rtnl_unlock(); + } else { + ice_vsi_close(vsi); + } + } +} + +/** * ice_free_res - free a block of resources * @res: pointer to the resource * @index: starting index previously returned by ice_get_res @@ -2869,7 +2242,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) return -EINVAL; if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { - dev_err(&pf->pdev->dev, + dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n", needed, res->num_entries, id); return -EINVAL; @@ -3031,10 +2404,11 @@ int ice_vsi_release(struct ice_vsi *vsi) /** * ice_vsi_rebuild - Rebuild VSI after reset * @vsi: VSI to be rebuild + * @init_vsi: is this an initialization or a reconfigure of the VSI * * Returns 0 on success and negative value on failure */ -int ice_vsi_rebuild(struct ice_vsi *vsi) +int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_vf *vf = NULL; @@ -3064,6 +2438,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) vsi->base_vector = 0; } + if (ice_is_xdp_ena_vsi(vsi)) + /* return value check can be skipped here, it always returns + * 0 if reset is in progress + */ + ice_destroy_xdp_rings(vsi); ice_vsi_put_qs(vsi); ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); @@ -3081,11 +2460,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) ice_vsi_set_tc_cfg(vsi); /* Initialize VSI struct elements and create VSI in FW */ - ret = ice_vsi_init(vsi); + ret = ice_vsi_init(vsi, init_vsi); if (ret < 0) goto err_vsi; - switch (vsi->type) { case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); @@ -3105,6 +2483,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) goto err_vectors; ice_vsi_map_rings_to_vectors(vsi); + if (ice_is_xdp_ena_vsi(vsi)) { + vsi->num_xdp_txq = vsi->alloc_txq; + ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); + if (ret) + goto err_vectors; + } /* Do not exit if configuring RSS had an issue, at least * receive traffic on first queue. Hence no need to capture * return value @@ -3131,16 +2515,25 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) } /* configure VSI nodes based on number of queues and TC's */ - for (i = 0; i < vsi->tc_cfg.numtc; i++) + for (i = 0; i < vsi->tc_cfg.numtc; i++) { max_txqs[i] = vsi->alloc_txq; + if (ice_is_xdp_ena_vsi(vsi)) + max_txqs[i] += vsi->num_xdp_txq; + } + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(&pf->pdev->dev, + dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n", vsi->vsi_num, status); - goto err_vectors; + if (init_vsi) { + ret = -EIO; + goto err_vectors; + } else { + return ice_schedule_reset(pf, ICE_RESET_PFR); + } } return 0; @@ -3166,6 +2559,7 @@ err_vsi: bool ice_is_reset_in_progress(unsigned long *state) { return test_bit(__ICE_RESET_OICR_RECV, state) || + test_bit(__ICE_DCBNL_DEVRESET, state) || test_bit(__ICE_PFR_REQ, state) || test_bit(__ICE_CORER_REQ, state) || test_bit(__ICE_GLOBR_REQ, state); @@ -3199,9 +2593,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) struct ice_vsi_ctx *ctx; struct ice_pf *pf = vsi->back; enum ice_status status; + struct device *dev; int i, ret = 0; u8 num_tc = 0; + dev = ice_pf_to_dev(pf); + ice_for_each_traffic_class(i) { /* build bitmap of enabled TCs */ if (ena_tc & BIT(i)) @@ -3213,7 +2610,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) vsi->tc_cfg.ena_tc = ena_tc; vsi->tc_cfg.numtc = num_tc; - ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -3226,7 +2623,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL); if (status) { - dev_info(&pf->pdev->dev, "Failed VSI Update\n"); + dev_info(dev, "Failed VSI Update\n"); ret = -EIO; goto out; } @@ -3235,8 +2632,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) max_txqs); if (status) { - dev_err(&pf->pdev->dev, - "VSI %d failed TC config, error %d\n", + dev_err(dev, "VSI %d failed TC config, error %d\n", vsi->vsi_num, status); ret = -EIO; goto out; @@ -3246,7 +2642,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) ice_vsi_cfg_netdev_tc(vsi, ena_tc); out: - devm_kfree(&pf->pdev->dev, ctx); + kfree(ctx); return ret; } #endif /* CONFIG_DCB */ @@ -3271,6 +2667,51 @@ char *ice_nvm_version_str(struct ice_hw *hw) } /** + * ice_update_ring_stats - Update ring statistics + * @ring: ring to update + * @cont: used to increment per-vector counters + * @pkts: number of processed packets + * @bytes: number of processed bytes + * + * This function assumes that caller has acquired a u64_stats_sync lock. + */ +static void +ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, + u64 pkts, u64 bytes) +{ + ring->stats.bytes += bytes; + ring->stats.pkts += pkts; + cont->total_bytes += bytes; + cont->total_pkts += pkts; +} + +/** + * ice_update_tx_ring_stats - Update Tx ring specific counters + * @tx_ring: ring to update + * @pkts: number of processed packets + * @bytes: number of processed bytes + */ +void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) +{ + u64_stats_update_begin(&tx_ring->syncp); + ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes); + u64_stats_update_end(&tx_ring->syncp); +} + +/** + * ice_update_rx_ring_stats - Update Rx ring specific counters + * @rx_ring: ring to update + * @pkts: number of processed packets + * @bytes: number of processed bytes + */ +void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) +{ + u64_stats_update_begin(&rx_ring->syncp); + ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes); + u64_stats_update_end(&rx_ring->syncp); +} + +/** * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI * @vsi: the VSI being configured MAC filter * @macaddr: the MAC address to be added. diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 47bc033fff20..6e31e30aba39 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -6,18 +6,7 @@ #include "ice.h" -struct ice_txq_meta { - /* Tx-scheduler element identifier */ - u32 q_teid; - /* Entry in VSI's txq_map bitmap */ - u16 q_id; - /* Relative index of Tx queue within TC */ - u16 q_handle; - /* VSI index that Tx queue belongs to */ - u16 vsi_idx; - /* TC number that Tx queue belongs to */ - u8 tc; -}; +const char *ice_vsi_type_str(enum ice_vsi_type type); int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, @@ -33,24 +22,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi); void ice_vsi_cfg_msix(struct ice_vsi *vsi); -#ifdef CONFIG_PCI_IOV -void -ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx); - -void -ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); - -int -ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring *ring, - struct ice_txq_meta *txq_meta); - -void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, - struct ice_txq_meta *txq_meta); - -int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx); -#endif /* CONFIG_PCI_IOV */ - int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid); int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid); @@ -67,6 +38,10 @@ int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num); +int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi); + +int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi); + int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); @@ -89,25 +64,21 @@ int ice_vsi_release(struct ice_vsi *vsi); void ice_vsi_close(struct ice_vsi *vsi); +int ice_ena_vsi(struct ice_vsi *vsi, bool locked); + +void ice_dis_vsi(struct ice_vsi *vsi, bool locked); + int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id); int ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id); -int ice_vsi_rebuild(struct ice_vsi *vsi); +int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi); bool ice_is_reset_in_progress(unsigned long *state); -void ice_vsi_free_q_vectors(struct ice_vsi *vsi); - -void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); - void ice_vsi_put_qs(struct ice_vsi *vsi); -#ifdef CONFIG_DCB -void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi); -#endif /* CONFIG_DCB */ - void ice_vsi_dis_irq(struct ice_vsi *vsi); void ice_vsi_free_irq(struct ice_vsi *vsi); @@ -118,6 +89,12 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi); int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); +void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); + +void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); + +void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); + u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran); char *ice_nvm_version_str(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 214cd6eca405..69bff085acf7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6,8 +6,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "ice.h" +#include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" +#include "ice_dcb_nl.h" #define DRV_VERSION_MAJOR 0 #define DRV_VERSION_MINOR 8 @@ -42,6 +44,7 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_safe_mode_ops; static const struct net_device_ops ice_netdev_ops; +static int ice_vsi_open(struct ice_vsi *vsi); static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type); @@ -159,7 +162,7 @@ unregister: * had an error */ if (status && vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(&pf->pdev->dev, + dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %d. Unregistering device\n", status); unregister_netdev(vsi->netdev); @@ -435,42 +438,11 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf) } /** - * ice_dis_vsi - pause a VSI - * @vsi: the VSI being paused - * @locked: is the rtnl_lock already held - */ -static void ice_dis_vsi(struct ice_vsi *vsi, bool locked) -{ - if (test_bit(__ICE_DOWN, vsi->state)) - return; - - set_bit(__ICE_NEEDS_RESTART, vsi->state); - - if (vsi->type == ICE_VSI_PF && vsi->netdev) { - if (netif_running(vsi->netdev)) { - if (!locked) - rtnl_lock(); - - ice_stop(vsi->netdev); - - if (!locked) - rtnl_unlock(); - } else { - ice_vsi_close(vsi); - } - } -} - -/** * ice_pf_dis_all_vsi - Pause all VSIs on a PF * @pf: the PF * @locked: is the rtnl_lock already held */ -#ifdef CONFIG_DCB -void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) -#else static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) -#endif /* CONFIG_DCB */ { int v; @@ -524,7 +496,7 @@ ice_prepare_for_reset(struct ice_pf *pf) */ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); @@ -636,8 +608,14 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi) switch (vsi->port_info->phy.link_info.topo_media_conflict) { case ICE_AQ_LINK_TOPO_CONFLICT: case ICE_AQ_LINK_MEDIA_CONFLICT: + case ICE_AQ_LINK_TOPO_UNREACH_PRT: + case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT: + case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA: netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n"); break; + case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: + netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + break; default: break; } @@ -747,7 +725,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) an = "False"; /* Get FEC mode requested based on PHY caps last SW configuration */ - caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + caps = kzalloc(sizeof(*caps), GFP_KERNEL); if (!caps) { fec_req = "Unknown"; goto done; @@ -767,7 +745,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) else fec_req = "NONE"; - devm_kfree(&vsi->back->pdev->dev, caps); + kfree(caps); done: netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n", @@ -815,6 +793,7 @@ static int ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, u16 link_speed) { + struct device *dev = ice_pf_to_dev(pf); struct ice_phy_info *phy_info; struct ice_vsi *vsi; u16 old_link_speed; @@ -832,7 +811,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, */ result = ice_update_link_info(pi); if (result) - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n", pi->lport); @@ -851,7 +830,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, result = ice_aq_set_link_restart_an(pi, false, NULL); if (result) { - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "Failed to set link down, VSI %d error %d\n", vsi->vsi_num, result); return result; @@ -947,7 +926,7 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) !!(link_data->link_info & ICE_AQ_LINK_UP), le16_to_cpu(link_data->link_speed)); if (status) - dev_dbg(&pf->pdev->dev, + dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n", status); return status; @@ -960,6 +939,7 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) */ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) { + struct device *dev = ice_pf_to_dev(pf); struct ice_rq_event_info event; struct ice_hw *hw = &pf->hw; struct ice_ctl_q_info *cq; @@ -981,8 +961,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) qtype = "Mailbox"; break; default: - dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n", - q_type); + dev_warn(dev, "Unknown control queue type 0x%x\n", q_type); return 0; } @@ -994,15 +973,15 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) PF_FW_ARQLEN_ARQCRIT_M)) { oldval = val; if (val & PF_FW_ARQLEN_ARQVFE_M) - dev_dbg(&pf->pdev->dev, - "%s Receive Queue VF Error detected\n", qtype); + dev_dbg(dev, "%s Receive Queue VF Error detected\n", + qtype); if (val & PF_FW_ARQLEN_ARQOVFL_M) { - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "%s Receive Queue Overflow Error detected\n", qtype); } if (val & PF_FW_ARQLEN_ARQCRIT_M) - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "%s Receive Queue Critical Error detected\n", qtype); val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | @@ -1016,16 +995,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) PF_FW_ATQLEN_ATQCRIT_M)) { oldval = val; if (val & PF_FW_ATQLEN_ATQVFE_M) - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "%s Send Queue VF Error detected\n", qtype); if (val & PF_FW_ATQLEN_ATQOVFL_M) { - dev_dbg(&pf->pdev->dev, - "%s Send Queue Overflow Error detected\n", + dev_dbg(dev, "%s Send Queue Overflow Error detected\n", qtype); } if (val & PF_FW_ATQLEN_ATQCRIT_M) - dev_dbg(&pf->pdev->dev, - "%s Send Queue Critical Error detected\n", + dev_dbg(dev, "%s Send Queue Critical Error detected\n", qtype); val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | PF_FW_ATQLEN_ATQCRIT_M); @@ -1034,8 +1011,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) } event.buf_len = cq->rq_buf_size; - event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len, - GFP_KERNEL); + event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); if (!event.msg_buf) return 0; @@ -1047,8 +1023,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) if (ret == ICE_ERR_AQ_NO_WORK) break; if (ret) { - dev_err(&pf->pdev->dev, - "%s Receive Queue event error %d\n", qtype, + dev_err(dev, "%s Receive Queue event error %d\n", qtype, ret); break; } @@ -1058,8 +1033,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) switch (opcode) { case ice_aqc_opc_get_link_status: if (ice_handle_link_event(pf, &event)) - dev_err(&pf->pdev->dev, - "Could not handle link event\n"); + dev_err(dev, "Could not handle link event\n"); break; case ice_mbx_opc_send_msg_to_pf: ice_vc_process_vf_msg(pf, &event); @@ -1071,14 +1045,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ice_dcb_process_lldp_set_mib_change(pf, &event); break; default: - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n", qtype, opcode); break; } } while (pending && (i++ < ICE_DFLT_IRQ_WORK)); - devm_kfree(&pf->pdev->dev, event.msg_buf); + kfree(event.msg_buf); return pending && (i == ICE_DFLT_IRQ_WORK); } @@ -1222,6 +1196,7 @@ static void ice_service_timer(struct timer_list *t) */ static void ice_handle_mdd_event(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; bool mdd_detected = false; u32 reg; @@ -1243,7 +1218,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) GL_MDET_TX_PQM_QNUM_S); if (netif_msg_tx_err(pf)) - dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", + dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_TX_PQM, 0xffffffff); mdd_detected = true; @@ -1261,7 +1236,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) GL_MDET_TX_TCLAN_QNUM_S); if (netif_msg_rx_err(pf)) - dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", + dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); mdd_detected = true; @@ -1279,7 +1254,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) GL_MDET_RX_QNUM_S); if (netif_msg_rx_err(pf)) - dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", + dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_RX, 0xffffffff); mdd_detected = true; @@ -1291,21 +1266,21 @@ static void ice_handle_mdd_event(struct ice_pf *pf) reg = rd32(hw, PF_MDET_TX_PQM); if (reg & PF_MDET_TX_PQM_VALID_M) { wr32(hw, PF_MDET_TX_PQM, 0xFFFF); - dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); + dev_info(dev, "TX driver issue detected, PF reset issued\n"); pf_mdd_detected = true; } reg = rd32(hw, PF_MDET_TX_TCLAN); if (reg & PF_MDET_TX_TCLAN_VALID_M) { wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF); - dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); + dev_info(dev, "TX driver issue detected, PF reset issued\n"); pf_mdd_detected = true; } reg = rd32(hw, PF_MDET_RX); if (reg & PF_MDET_RX_VALID_M) { wr32(hw, PF_MDET_RX, 0xFFFF); - dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n"); + dev_info(dev, "RX driver issue detected, PF reset issued\n"); pf_mdd_detected = true; } /* Queue belongs to the PF initiate a reset */ @@ -1325,7 +1300,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + dev_info(dev, "TX driver issue detected on VF %d\n", i); } @@ -1333,7 +1308,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TCLAN_VALID_M) { wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + dev_info(dev, "TX driver issue detected on VF %d\n", i); } @@ -1341,7 +1316,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TDPU_VALID_M) { wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + dev_info(dev, "TX driver issue detected on VF %d\n", i); } @@ -1349,7 +1324,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_RX_VALID_M) { wr32(hw, VP_MDET_RX(i), 0xFFFF); vf_mdd_detected = true; - dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n", + dev_info(dev, "RX driver issue detected on VF %d\n", i); } @@ -1357,7 +1332,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) vf->num_mdd_events++; if (vf->num_mdd_events && vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD) - dev_info(&pf->pdev->dev, + dev_info(dev, "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n", i, vf->num_mdd_events); } @@ -1393,7 +1368,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) pi = vsi->port_info; - pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL); + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return -ENOMEM; @@ -1412,7 +1387,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) goto out; - cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); if (!cfg) { retcode = -ENOMEM; goto out; @@ -1437,9 +1412,9 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) retcode = -EIO; } - devm_kfree(dev, cfg); + kfree(cfg); out: - devm_kfree(dev, pcaps); + kfree(pcaps); return retcode; } @@ -1551,6 +1526,44 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) } /** + * ice_schedule_reset - schedule a reset + * @pf: board private structure + * @reset: reset being requested + */ +int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) +{ + struct device *dev = ice_pf_to_dev(pf); + + /* bail out if earlier reset has failed */ + if (test_bit(__ICE_RESET_FAILED, pf->state)) { + dev_dbg(dev, "earlier reset has failed\n"); + return -EIO; + } + /* bail if reset/recovery already in progress */ + if (ice_is_reset_in_progress(pf->state)) { + dev_dbg(dev, "Reset already in progress\n"); + return -EBUSY; + } + + switch (reset) { + case ICE_RESET_PFR: + set_bit(__ICE_PFR_REQ, pf->state); + break; + case ICE_RESET_CORER: + set_bit(__ICE_CORER_REQ, pf->state); + break; + case ICE_RESET_GLOBR: + set_bit(__ICE_GLOBR_REQ, pf->state); + break; + default: + return -EINVAL; + } + + ice_service_task_schedule(pf); + return 0; +} + +/** * ice_irq_affinity_notify - Callback for affinity changes * @notify: context as to what irq was changed * @mask: the new affinity mask @@ -1604,11 +1617,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) int q_vectors = vsi->num_q_vectors; struct ice_pf *pf = vsi->back; int base = vsi->base_vector; + struct device *dev; int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; int irq_num; + dev = ice_pf_to_dev(pf); for (vector = 0; vector < q_vectors; vector++) { struct ice_q_vector *q_vector = vsi->q_vectors[vector]; @@ -1628,8 +1643,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - err = devm_request_irq(&pf->pdev->dev, irq_num, - vsi->irq_handler, 0, + err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0, q_vector->name, q_vector); if (err) { netdev_err(vsi->netdev, @@ -1655,12 +1669,331 @@ free_q_irqs: irq_num = pf->msix_entries[base + vector].vector, irq_set_affinity_notifier(irq_num, NULL); irq_set_affinity_hint(irq_num, NULL); - devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]); + devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]); } return err; } /** + * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP + * @vsi: VSI to setup Tx rings used by XDP + * + * Return 0 on success and negative value on error + */ +static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) +{ + struct device *dev = &vsi->back->pdev->dev; + int i; + + for (i = 0; i < vsi->num_xdp_txq; i++) { + u16 xdp_q_idx = vsi->alloc_txq + i; + struct ice_ring *xdp_ring; + + xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); + + if (!xdp_ring) + goto free_xdp_rings; + + xdp_ring->q_index = xdp_q_idx; + xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; + xdp_ring->ring_active = false; + xdp_ring->vsi = vsi; + xdp_ring->netdev = NULL; + xdp_ring->dev = dev; + xdp_ring->count = vsi->num_tx_desc; + vsi->xdp_rings[i] = xdp_ring; + if (ice_setup_tx_ring(xdp_ring)) + goto free_xdp_rings; + ice_set_ring_xdp(xdp_ring); + xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring); + } + + return 0; + +free_xdp_rings: + for (; i >= 0; i--) + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + ice_free_tx_ring(vsi->xdp_rings[i]); + return -ENOMEM; +} + +/** + * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI + * @vsi: VSI to set the bpf prog on + * @prog: the bpf prog pointer + */ +static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + int i; + + old_prog = xchg(&vsi->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + ice_for_each_rxq(vsi, i) + WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); +} + +/** + * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP + * @vsi: VSI to bring up Tx rings used by XDP + * @prog: bpf program that will be assigned to VSI + * + * Return 0 on success and negative value on error + */ +int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) +{ + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + int xdp_rings_rem = vsi->num_xdp_txq; + struct ice_pf *pf = vsi->back; + struct ice_qs_cfg xdp_qs_cfg = { + .qs_mutex = &pf->avail_q_mutex, + .pf_map = pf->avail_txqs, + .pf_map_size = pf->max_pf_txqs, + .q_count = vsi->num_xdp_txq, + .scatter_count = ICE_MAX_SCATTER_TXQS, + .vsi_map = vsi->txq_map, + .vsi_map_offset = vsi->alloc_txq, + .mapping_mode = ICE_VSI_MAP_CONTIG + }; + enum ice_status status; + struct device *dev; + int i, v_idx; + + dev = ice_pf_to_dev(pf); + vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq, + sizeof(*vsi->xdp_rings), GFP_KERNEL); + if (!vsi->xdp_rings) + return -ENOMEM; + + vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode; + if (__ice_vsi_get_qs(&xdp_qs_cfg)) + goto err_map_xdp; + + if (ice_xdp_alloc_setup_rings(vsi)) + goto clear_xdp_rings; + + /* follow the logic from ice_vsi_map_rings_to_vectors */ + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + int xdp_rings_per_v, q_id, q_base; + + xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem, + vsi->num_q_vectors - v_idx); + q_base = vsi->num_xdp_txq - xdp_rings_rem; + + for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) { + struct ice_ring *xdp_ring = vsi->xdp_rings[q_id]; + + xdp_ring->q_vector = q_vector; + xdp_ring->next = q_vector->tx.ring; + q_vector->tx.ring = xdp_ring; + } + xdp_rings_rem -= xdp_rings_per_v; + } + + /* omit the scheduler update if in reset path; XDP queues will be + * taken into account at the end of ice_vsi_rebuild, where + * ice_cfg_vsi_lan is being called + */ + if (ice_is_reset_in_progress(pf->state)) + return 0; + + /* tell the Tx scheduler that right now we have + * additional queues + */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq; + + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, + max_txqs); + if (status) { + dev_err(dev, "Failed VSI LAN queue config for XDP, error:%d\n", + status); + goto clear_xdp_rings; + } + ice_vsi_assign_bpf_prog(vsi, prog); + + return 0; +clear_xdp_rings: + for (i = 0; i < vsi->num_xdp_txq; i++) + if (vsi->xdp_rings[i]) { + kfree_rcu(vsi->xdp_rings[i], rcu); + vsi->xdp_rings[i] = NULL; + } + +err_map_xdp: + mutex_lock(&pf->avail_q_mutex); + for (i = 0; i < vsi->num_xdp_txq; i++) { + clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); + vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; + } + mutex_unlock(&pf->avail_q_mutex); + + devm_kfree(dev, vsi->xdp_rings); + return -ENOMEM; +} + +/** + * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings + * @vsi: VSI to remove XDP rings + * + * Detach XDP rings from irq vectors, clean up the PF bitmap and free + * resources + */ +int ice_destroy_xdp_rings(struct ice_vsi *vsi) +{ + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct ice_pf *pf = vsi->back; + int i, v_idx; + + /* q_vectors are freed in reset path so there's no point in detaching + * rings; in case of rebuild being triggered not from reset reset bits + * in pf->state won't be set, so additionally check first q_vector + * against NULL + */ + if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + goto free_qmap; + + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + struct ice_ring *ring; + + ice_for_each_ring(ring, q_vector->tx) + if (!ring->tx_buf || !ice_ring_is_xdp(ring)) + break; + + /* restore the value of last node prior to XDP setup */ + q_vector->tx.ring = ring; + } + +free_qmap: + mutex_lock(&pf->avail_q_mutex); + for (i = 0; i < vsi->num_xdp_txq; i++) { + clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); + vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; + } + mutex_unlock(&pf->avail_q_mutex); + + for (i = 0; i < vsi->num_xdp_txq; i++) + if (vsi->xdp_rings[i]) { + if (vsi->xdp_rings[i]->desc) + ice_free_tx_ring(vsi->xdp_rings[i]); + kfree_rcu(vsi->xdp_rings[i], rcu); + vsi->xdp_rings[i] = NULL; + } + + devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings); + vsi->xdp_rings = NULL; + + if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + return 0; + + ice_vsi_assign_bpf_prog(vsi, NULL); + + /* notify Tx scheduler that we destroyed XDP queues and bring + * back the old number of child nodes + */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq; + + return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, + max_txqs); +} + +/** + * ice_xdp_setup_prog - Add or remove XDP eBPF program + * @vsi: VSI to setup XDP for + * @prog: XDP program + * @extack: netlink extended ack + */ +static int +ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; + bool if_running = netif_running(vsi->netdev); + int ret = 0, xdp_ring_err = 0; + + if (frame_size > vsi->rx_buf_len) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP"); + return -EOPNOTSUPP; + } + + /* need to stop netdev while setting up the program for Rx rings */ + if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) { + ret = ice_down(vsi); + if (ret) { + NL_SET_ERR_MSG_MOD(extack, + "Preparing device for XDP attach failed"); + return ret; + } + } + + if (!ice_is_xdp_ena_vsi(vsi) && prog) { + vsi->num_xdp_txq = vsi->alloc_txq; + xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, + "Setting up XDP Tx resources failed"); + } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { + xdp_ring_err = ice_destroy_xdp_rings(vsi); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, + "Freeing XDP Tx resources failed"); + } else { + ice_vsi_assign_bpf_prog(vsi, prog); + } + + if (if_running) + ret = ice_up(vsi); + + if (!ret && prog && vsi->xsk_umems) { + int i; + + ice_for_each_rxq(vsi, i) { + struct ice_ring *rx_ring = vsi->rx_rings[i]; + + if (rx_ring->xsk_umem) + napi_schedule(&rx_ring->q_vector->napi); + } + } + + return (ret || xdp_ring_err) ? -ENOMEM : 0; +} + +/** + * ice_xdp - implements XDP handler + * @dev: netdevice + * @xdp: XDP command + */ +static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + + if (vsi->type != ICE_VSI_PF) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "XDP can be loaded only on PF VSI"); + return -EINVAL; + } + + switch (xdp->command) { + case XDP_SETUP_PROG: + return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + case XDP_QUERY_PROG: + xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; + return 0; + case XDP_SETUP_XSK_UMEM: + return ice_xsk_umem_setup(vsi, xdp->xsk.umem, + xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + +/** * ice_ena_misc_vector - enable the non-queue interrupts * @pf: board private structure */ @@ -1698,8 +2031,10 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) struct ice_pf *pf = (struct ice_pf *)data; struct ice_hw *hw = &pf->hw; irqreturn_t ret = IRQ_NONE; + struct device *dev; u32 oicr, ena_mask; + dev = ice_pf_to_dev(pf); set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); @@ -1735,8 +2070,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) else if (reset == ICE_RESET_EMPR) pf->empr_count++; else - dev_dbg(&pf->pdev->dev, "Invalid reset type %d\n", - reset); + dev_dbg(dev, "Invalid reset type %d\n", reset); /* If a reset cycle isn't already in progress, we set a bit in * pf->state so that the service task can start a reset/rebuild. @@ -1770,8 +2104,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_HMC_ERR_M) { ena_mask &= ~PFINT_OICR_HMC_ERR_M; - dev_dbg(&pf->pdev->dev, - "HMC Error interrupt - info 0x%x, data 0x%x\n", + dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n", rd32(hw, PFHMC_ERRORINFO), rd32(hw, PFHMC_ERRORDATA)); } @@ -1779,8 +2112,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) /* Report any remaining unexpected interrupts */ oicr &= ena_mask; if (oicr) { - dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n", - oicr); + dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr); /* If a critical error is pending there is no choice but to * reset the device. */ @@ -1838,7 +2170,7 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) if (pf->msix_entries) { synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); - devm_free_irq(&pf->pdev->dev, + devm_free_irq(ice_pf_to_dev(pf), pf->msix_entries[pf->oicr_idx].vector, pf); } @@ -1882,13 +2214,13 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx) */ static int ice_req_irq_msix_misc(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int oicr_idx, err = 0; if (!pf->int_name[0]) snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", - dev_driver_string(&pf->pdev->dev), - dev_name(&pf->pdev->dev)); + dev_driver_string(dev), dev_name(dev)); /* Do not request IRQ but do enable OICR interrupt since settings are * lost during reset. Note that this function is called only during @@ -1905,12 +2237,10 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) pf->num_avail_sw_msix -= 1; pf->oicr_idx = oicr_idx; - err = devm_request_irq(&pf->pdev->dev, - pf->msix_entries[pf->oicr_idx].vector, + err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector, ice_misc_intr, 0, pf->int_name, pf); if (err) { - dev_err(&pf->pdev->dev, - "devm_request_irq for %s failed: %d\n", + dev_err(dev, "devm_request_irq for %s failed: %d\n", pf->int_name, err); ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); pf->num_avail_sw_msix += 1; @@ -2043,7 +2373,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) ice_set_ops(netdev); if (vsi->type == ICE_VSI_PF) { - SET_NETDEV_DEV(netdev, &pf->pdev->dev); + SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf)); ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); ether_addr_copy(netdev->dev_addr, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); @@ -2219,6 +2549,11 @@ static int ice_setup_pf_sw(struct ice_pf *pf) status = -ENODEV; goto unroll_vsi_setup; } + /* netdev has to be configured before setting frame size */ + ice_vsi_cfg_frame_size(vsi); + + /* Setup DCB netlink interface */ + ice_dcbnl_setup(vsi); /* registering the NAPI handler requires both the queues and * netdev to be created, which are done in ice_pf_vsi_setup() @@ -2300,6 +2635,7 @@ static void ice_deinit_pf(struct ice_pf *pf) { ice_service_task_stop(pf); mutex_destroy(&pf->sw_mutex); + mutex_destroy(&pf->tc_mutex); mutex_destroy(&pf->avail_q_mutex); if (pf->avail_txqs) { @@ -2349,6 +2685,7 @@ static int ice_init_pf(struct ice_pf *pf) ice_set_pf_caps(pf); mutex_init(&pf->sw_mutex); + mutex_init(&pf->tc_mutex); /* setup service timer and periodic service task */ timer_setup(&pf->serv_tmr, ice_service_timer, 0); @@ -2363,7 +2700,7 @@ static int ice_init_pf(struct ice_pf *pf) pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); if (!pf->avail_rxqs) { - devm_kfree(&pf->pdev->dev, pf->avail_txqs); + devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs); pf->avail_txqs = NULL; return -ENOMEM; } @@ -2380,6 +2717,7 @@ static int ice_init_pf(struct ice_pf *pf) */ static int ice_ena_msix_range(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); int v_left, v_actual, v_budget = 0; int needed, err, i; @@ -2400,7 +2738,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) v_budget += needed; v_left -= needed; - pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget, + pf->msix_entries = devm_kcalloc(dev, v_budget, sizeof(*pf->msix_entries), GFP_KERNEL); if (!pf->msix_entries) { @@ -2416,13 +2754,13 @@ static int ice_ena_msix_range(struct ice_pf *pf) ICE_MIN_MSIX, v_budget); if (v_actual < 0) { - dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n"); + dev_err(dev, "unable to reserve MSI-X vectors\n"); err = v_actual; goto msix_err; } if (v_actual < v_budget) { - dev_warn(&pf->pdev->dev, + dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", v_budget, v_actual); /* 2 vectors for LAN (traffic + OICR) */ @@ -2441,11 +2779,11 @@ static int ice_ena_msix_range(struct ice_pf *pf) return v_actual; msix_err: - devm_kfree(&pf->pdev->dev, pf->msix_entries); + devm_kfree(dev, pf->msix_entries); goto exit_err; no_hw_vecs_left_err: - dev_err(&pf->pdev->dev, + dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n", needed, v_left); err = -ERANGE; @@ -2461,7 +2799,7 @@ exit_err: static void ice_dis_msix(struct ice_pf *pf) { pci_disable_msix(pf->pdev); - devm_kfree(&pf->pdev->dev, pf->msix_entries); + devm_kfree(ice_pf_to_dev(pf), pf->msix_entries); pf->msix_entries = NULL; } @@ -2474,7 +2812,7 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf) ice_dis_msix(pf); if (pf->irq_tracker) { - devm_kfree(&pf->pdev->dev, pf->irq_tracker); + devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); pf->irq_tracker = NULL; } } @@ -2494,7 +2832,7 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) /* set up vector assignment tracking */ pf->irq_tracker = - devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) + + devm_kzalloc(ice_pf_to_dev(pf), sizeof(*pf->irq_tracker) + (sizeof(u16) * vectors), GFP_KERNEL); if (!pf->irq_tracker) { ice_dis_msix(pf); @@ -2510,6 +2848,52 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) } /** + * ice_vsi_recfg_qs - Change the number of queues on a VSI + * @vsi: VSI being changed + * @new_rx: new number of Rx queues + * @new_tx: new number of Tx queues + * + * Only change the number of queues if new_tx, or new_rx is non-0. + * + * Returns 0 on success. + */ +int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) +{ + struct ice_pf *pf = vsi->back; + int err = 0, timeout = 50; + + if (!new_rx && !new_tx) + return -EINVAL; + + while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + if (new_tx) + vsi->req_txq = new_tx; + if (new_rx) + vsi->req_rxq = new_rx; + + /* set for the next time the netdev is started */ + if (!netif_running(vsi->netdev)) { + ice_vsi_rebuild(vsi, false); + dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n"); + goto done; + } + + ice_vsi_close(vsi); + ice_vsi_rebuild(vsi, false); + ice_pf_dcb_recfg(pf); + ice_vsi_open(vsi); +done: + clear_bit(__ICE_CFG_BUSY, pf->state); + return err; +} + +/** * ice_log_pkg_init - log result of DDP package load * @hw: pointer to hardware info * @status: status of package load @@ -2518,7 +2902,7 @@ static void ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status) { struct ice_pf *pf = (struct ice_pf *)hw->back; - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); switch (*status) { case ICE_SUCCESS: @@ -2598,7 +2982,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status) ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); break; case ICE_ERR_AQ_ERROR: - switch (hw->adminq.sq_last_status) { + switch (hw->pkg_dwnld_status) { case ICE_AQ_RC_ENOSEC: case ICE_AQ_RC_EBADSIG: dev_err(dev, @@ -2637,7 +3021,7 @@ static void ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf) { enum ice_status status = ICE_ERR_PARAM; - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; /* Load DDP Package */ @@ -2677,7 +3061,7 @@ ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf) static void ice_verify_cacheline_size(struct ice_pf *pf) { if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M) - dev_warn(&pf->pdev->dev, + dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", ICE_CACHE_LINE_BYTES); } @@ -2747,7 +3131,7 @@ static void ice_request_fw(struct ice_pf *pf) { char *opt_fw_filename = ice_get_opt_fw_name(pf); const struct firmware *firmware = NULL; - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); int err = 0; /* optional device-specific DDP (if present) overrides the default DDP @@ -2831,6 +3215,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) hw = &pf->hw; hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; + pci_save_state(pdev); + hw->back = pf; hw->vendor_id = pdev->vendor; hw->device_id = pdev->device; @@ -2936,7 +3322,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) err = ice_setup_pf_sw(pf); if (err) { - dev_err(dev, "probe failed due to setup PF switch:%d\n", err); + dev_err(dev, "probe failed due to setup PF switch: %d\n", err); goto err_alloc_sw_unroll; } @@ -2976,12 +3362,15 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_cfg_lldp_mib_change(&pf->hw, true); } + /* print PCI link speed and width */ + pcie_print_link_status(pf->pdev); + return 0; err_alloc_sw_unroll: set_bit(__ICE_SERVICE_DIS, pf->state); set_bit(__ICE_DOWN, pf->state); - devm_kfree(&pf->pdev->dev, pf->first_sw); + devm_kfree(dev, pf->first_sw); err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: @@ -3027,12 +3416,13 @@ static void ice_remove(struct pci_dev *pdev) } ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); - ice_clear_interrupt_scheme(pf); /* Issue a PFR as part of the prescribed driver unload flow. Do not * do it via ice_schedule_reset() since there is no need to rebuild * and the service task is already stopped. */ ice_reset(&pf->hw, ICE_RESET_PFR); + pci_wait_for_pending_transaction(pdev); + ice_clear_interrupt_scheme(pf); pci_disable_pcie_error_reporting(pdev); } @@ -3347,6 +3737,48 @@ static void ice_set_rx_mode(struct net_device *netdev) } /** + * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate + * @netdev: network interface device structure + * @queue_index: Queue ID + * @maxrate: maximum bandwidth in Mbps + */ +static int +ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + enum ice_status status; + u16 q_handle; + u8 tc; + + /* Validate maxrate requested is within permitted range */ + if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) { + netdev_err(netdev, + "Invalid max rate %d specified for the queue %d\n", + maxrate, queue_index); + return -EINVAL; + } + + q_handle = vsi->tx_rings[queue_index]->q_handle; + tc = ice_dcb_get_tc(vsi, queue_index); + + /* Set BW back to default, when user set maxrate to 0 */ + if (!maxrate) + status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc, + q_handle, ICE_MAX_BW); + else + status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc, + q_handle, ICE_MAX_BW, maxrate * 1000); + if (status) { + netdev_err(netdev, + "Unable to set Tx max rate, error %d\n", status); + return -EIO; + } + + return 0; +} + +/** * ice_fdb_add - add an entry to the hardware database * @ndm: the input from the stack * @tb: pointer to array of nladdr (unused) @@ -3426,6 +3858,7 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; int ret = 0; /* Don't set any netdev advanced features with device in Safe Mode */ @@ -3435,6 +3868,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) return ret; } + /* Do not change setting during reset */ + if (ice_is_reset_in_progress(pf->state)) { + dev_err(&vsi->back->pdev->dev, + "Device is resetting, changing advanced netdev features temporarily unavailable.\n"); + return -EBUSY; + } + /* Multiple features can be changed in one call so keep features in * separate if/else statements to guarantee each feature is checked */ @@ -3505,6 +3945,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi) ice_vsi_cfg_dcb_rings(vsi); err = ice_vsi_cfg_lan_txqs(vsi); + if (!err && ice_is_xdp_ena_vsi(vsi)) + err = ice_vsi_cfg_xdp_txqs(vsi); if (!err) err = ice_vsi_cfg_rxqs(vsi); @@ -3920,6 +4362,13 @@ int ice_down(struct ice_vsi *vsi) netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n", vsi->vsi_num, tx_err); + if (!tx_err && ice_is_xdp_ena_vsi(vsi)) { + tx_err = ice_vsi_stop_xdp_tx_rings(vsi); + if (tx_err) + netdev_err(vsi->netdev, + "Failed stop XDP rings, VSI %d error %d\n", + vsi->vsi_num, tx_err); + } rx_err = ice_vsi_stop_rx_rings(vsi); if (rx_err) @@ -3970,8 +4419,13 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) } ice_for_each_txq(vsi, i) { - vsi->tx_rings[i]->netdev = vsi->netdev; - err = ice_setup_tx_ring(vsi->tx_rings[i]); + struct ice_ring *ring = vsi->tx_rings[i]; + + if (!ring) + return -EINVAL; + + ring->netdev = vsi->netdev; + err = ice_setup_tx_ring(ring); if (err) break; } @@ -3996,8 +4450,13 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) } ice_for_each_rxq(vsi, i) { - vsi->rx_rings[i]->netdev = vsi->netdev; - err = ice_setup_rx_ring(vsi->rx_rings[i]); + struct ice_ring *ring = vsi->rx_rings[i]; + + if (!ring) + return -EINVAL; + + ring->netdev = vsi->netdev; + err = ice_setup_rx_ring(ring); if (err) break; } @@ -4033,7 +4492,7 @@ static int ice_vsi_open(struct ice_vsi *vsi) goto err_setup_rx; snprintf(int_name, sizeof(int_name) - 1, "%s-%s", - dev_driver_string(&pf->pdev->dev), vsi->netdev->name); + dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name); err = ice_vsi_req_irq_msix(vsi, int_name); if (err) goto err_setup_rx; @@ -4082,61 +4541,13 @@ static void ice_vsi_release_all(struct ice_pf *pf) err = ice_vsi_release(pf->vsi[i]); if (err) - dev_dbg(&pf->pdev->dev, + dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", i, err, pf->vsi[i]->vsi_num); } } /** - * ice_ena_vsi - resume a VSI - * @vsi: the VSI being resume - * @locked: is the rtnl_lock already held - */ -static int ice_ena_vsi(struct ice_vsi *vsi, bool locked) -{ - int err = 0; - - if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) - return 0; - - clear_bit(__ICE_NEEDS_RESTART, vsi->state); - - if (vsi->netdev && vsi->type == ICE_VSI_PF) { - if (netif_running(vsi->netdev)) { - if (!locked) - rtnl_lock(); - - err = ice_open(vsi->netdev); - - if (!locked) - rtnl_unlock(); - } - } - - return err; -} - -/** - * ice_pf_ena_all_vsi - Resume all VSIs on a PF - * @pf: the PF - * @locked: is the rtnl_lock already held - */ -#ifdef CONFIG_DCB -int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked) -{ - int v; - - ice_for_each_vsi(pf, v) - if (pf->vsi[v]) - if (ice_ena_vsi(pf->vsi[v], locked)) - return -EIO; - - return 0; -} -#endif /* CONFIG_DCB */ - -/** * ice_vsi_rebuild_by_type - Rebuild VSI of a given type * @pf: pointer to the PF instance * @type: VSI type to rebuild @@ -4145,6 +4556,7 @@ int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked) */ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) { + struct device *dev = ice_pf_to_dev(pf); enum ice_status status; int i, err; @@ -4155,20 +4567,20 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) continue; /* rebuild the VSI */ - err = ice_vsi_rebuild(vsi); + err = ice_vsi_rebuild(vsi, true); if (err) { - dev_err(&pf->pdev->dev, - "rebuild VSI failed, err %d, VSI index %d, type %d\n", - err, vsi->idx, type); + dev_err(dev, + "rebuild VSI failed, err %d, VSI index %d, type %s\n", + err, vsi->idx, ice_vsi_type_str(type)); return err; } /* replay filters for the VSI */ status = ice_replay_vsi(&pf->hw, vsi->idx); if (status) { - dev_err(&pf->pdev->dev, - "replay VSI failed, status %d, VSI index %d, type %d\n", - status, vsi->idx, type); + dev_err(dev, + "replay VSI failed, status %d, VSI index %d, type %s\n", + status, vsi->idx, ice_vsi_type_str(type)); return -EIO; } @@ -4180,14 +4592,14 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) /* enable the VSI */ err = ice_ena_vsi(vsi, false); if (err) { - dev_err(&pf->pdev->dev, - "enable VSI failed, err %d, VSI index %d, type %d\n", - err, vsi->idx, type); + dev_err(dev, + "enable VSI failed, err %d, VSI index %d, type %s\n", + err, vsi->idx, ice_vsi_type_str(type)); return err; } - dev_info(&pf->pdev->dev, "VSI rebuilt. VSI index %d, type %d\n", - vsi->idx, type); + dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx, + ice_vsi_type_str(type)); } return 0; @@ -4226,7 +4638,7 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf) */ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) { - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; enum ice_status ret; int err; @@ -4272,7 +4684,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) err = ice_update_link_info(hw->port_info); if (err) - dev_err(&pf->pdev->dev, "Get link status error %d\n", err); + dev_err(dev, "Get link status error %d\n", err); /* start misc vector */ err = ice_req_irq_msix_misc(pf); @@ -4329,6 +4741,18 @@ clear_recovery: } /** + * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * @vsi: Pointer to VSI structure + */ +static int ice_max_xdp_frame_size(struct ice_vsi *vsi) +{ + if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) + return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM; + else + return ICE_RXBUF_3072; +} + +/** * ice_change_mtu - NDO callback to change the MTU * @netdev: network interface device structure * @new_mtu: new value for maximum frame size @@ -4347,6 +4771,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return 0; } + if (ice_is_xdp_ena_vsi(vsi)) { + int frame_size = ice_max_xdp_frame_size(vsi); + + if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) { + netdev_err(netdev, "max MTU for XDP usage is %d\n", + frame_size - ICE_ETH_PKT_HDR_PAD); + return -EINVAL; + } + } + if (new_mtu < netdev->min_mtu) { netdev_err(netdev, "new MTU invalid. min_mtu is %d\n", netdev->min_mtu); @@ -4409,7 +4843,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; + struct device *dev; + dev = ice_pf_to_dev(pf); if (seed) { struct ice_aqc_get_set_rss_keys *buf = (struct ice_aqc_get_set_rss_keys *)seed; @@ -4417,8 +4853,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_set_rss_key(hw, vsi->idx, buf); if (status) { - dev_err(&pf->pdev->dev, - "Cannot set RSS key, err %d aq_err %d\n", + dev_err(dev, "Cannot set RSS key, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -4428,8 +4863,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, lut_size); if (status) { - dev_err(&pf->pdev->dev, - "Cannot set RSS lut, err %d aq_err %d\n", + dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -4452,15 +4886,16 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; + struct device *dev; + dev = ice_pf_to_dev(pf); if (seed) { struct ice_aqc_get_set_rss_keys *buf = (struct ice_aqc_get_set_rss_keys *)seed; status = ice_aq_get_rss_key(hw, vsi->idx, buf); if (status) { - dev_err(&pf->pdev->dev, - "Cannot get RSS key, err %d aq_err %d\n", + dev_err(dev, "Cannot get RSS key, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -4470,8 +4905,7 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, lut_size); if (status) { - dev_err(&pf->pdev->dev, - "Cannot get RSS lut, err %d aq_err %d\n", + dev_err(dev, "Cannot get RSS lut, err %d aq_err %d\n", status, hw->adminq.rq_last_status); return -EIO; } @@ -4515,7 +4949,6 @@ ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, */ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) { - struct device *dev = &vsi->back->pdev->dev; struct ice_aqc_vsi_props *vsi_props; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; @@ -4524,7 +4957,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) vsi_props = &vsi->info; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -4540,7 +4973,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", + dev_err(&vsi->back->pdev->dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", bmode, status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -4549,7 +4982,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) vsi_props->sw_flags = ctxt->info.sw_flags; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -4864,12 +5297,14 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, + .ndo_set_tx_maxrate = ice_set_tx_maxrate, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, .ndo_set_vf_trust = ice_set_vf_trust, .ndo_set_vf_vlan = ice_set_vf_port_vlan, .ndo_set_vf_link_state = ice_set_vf_link_state, + .ndo_get_vf_stats = ice_get_vf_stats, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_set_features = ice_set_features, @@ -4878,4 +5313,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_fdb_add = ice_fdb_add, .ndo_fdb_del = ice_fdb_del, .ndo_tx_timeout = ice_tx_timeout, + .ndo_bpf = ice_xdp, + .ndo_xdp_xmit = ice_xdp_xmit, + .ndo_xsk_wakeup = ice_xsk_wakeup, }; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index bcb431f1bd92..57c73f613f32 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -219,8 +219,7 @@ static void ice_release_nvm(struct ice_hw *hw) * * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq. */ -static enum ice_status -ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) +enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) { enum ice_status status; @@ -242,9 +241,10 @@ ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) */ enum ice_status ice_init_nvm(struct ice_hw *hw) { + u16 oem_hi, oem_lo, boot_cfg_tlv, boot_cfg_tlv_len; struct ice_nvm_info *nvm = &hw->nvm; u16 eetrack_lo, eetrack_hi; - enum ice_status status = 0; + enum ice_status status; u32 fla, gens_stat; u8 sr_size; @@ -261,15 +261,15 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) fla = rd32(hw, GLNVM_FLA); if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */ nvm->blank_nvm_mode = false; - } else { /* Blank programming mode */ + } else { + /* Blank programming mode */ nvm->blank_nvm_mode = true; - status = ICE_ERR_NVM_BLANK_MODE; ice_debug(hw, ICE_DBG_NVM, "NVM init error: unsupported blank mode.\n"); - return status; + return ICE_ERR_NVM_BLANK_MODE; } - status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver); + status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &nvm->ver); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to read DEV starter version.\n"); @@ -287,9 +287,42 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return status; } - hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo; + nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; - return status; + status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len, + ICE_SR_BOOT_CFG_PTR); + if (status) { + ice_debug(hw, ICE_DBG_INIT, + "Failed to read Boot Configuration Block TLV.\n"); + return status; + } + + /* Boot Configuration Block must have length at least 2 words + * (Combo Image Version High and Combo Image Version Low) + */ + if (boot_cfg_tlv_len < 2) { + ice_debug(hw, ICE_DBG_INIT, + "Invalid Boot Configuration Block TLV size.\n"); + return ICE_ERR_INVAL_SIZE; + } + + status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF), + &oem_hi); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER hi.\n"); + return status; + } + + status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF + 1), + &oem_lo); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER lo.\n"); + return status; + } + + nvm->oem_ver = ((u32)oem_hi << 16) | oem_lo; + + return 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h new file mode 100644 index 000000000000..a9fa011c22c6 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_NVM_H_ +#define _ICE_NVM_H_ + +enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data); +#endif /* _ICE_NVM_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2fde9653a608..eae707ddf8e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -411,6 +411,27 @@ ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req, } /** + * ice_aq_cfg_sched_elems - configures scheduler elements + * @hw: pointer to the HW struct + * @elems_req: number of elements to configure + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_cfgd: returns total number of elements configured + * @cd: pointer to command details structure or NULL + * + * Configure scheduling elements (0x0403) + */ +static enum ice_status +ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_conf_elem *buf, u16 buf_size, + u16 *elems_cfgd, struct ice_sq_cd *cd) +{ + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems, + elems_req, (void *)buf, buf_size, + elems_cfgd, cd); +} + +/** * ice_aq_suspend_sched_elems - suspend scheduler elements * @hw: pointer to the HW struct * @elems_req: number of elements to suspend @@ -557,6 +578,149 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs) } /** + * ice_aq_rl_profile - performs a rate limiting task + * @hw: pointer to the HW struct + * @opcode:opcode for add, query, or remove profile(s) + * @num_profiles: the number of profiles + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_processed: number of processed add or remove profile(s) to return + * @cd: pointer to command details structure + * + * RL profile function to add, query, or remove profile(s) + */ +static enum ice_status +ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode, + u16 num_profiles, struct ice_aqc_rl_profile_generic_elem *buf, + u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd) +{ + struct ice_aqc_rl_profile *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.rl_profile; + + ice_fill_dflt_direct_cmd_desc(&desc, opcode); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + cmd->num_profiles = cpu_to_le16(num_profiles); + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && num_processed) + *num_processed = le16_to_cpu(cmd->num_processed); + return status; +} + +/** + * ice_aq_add_rl_profile - adds rate limiting profile(s) + * @hw: pointer to the HW struct + * @num_profiles: the number of profile(s) to be add + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_profiles_added: total number of profiles added to return + * @cd: pointer to command details structure + * + * Add RL profile (0x0410) + */ +static enum ice_status +ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles, + struct ice_aqc_rl_profile_generic_elem *buf, + u16 buf_size, u16 *num_profiles_added, + struct ice_sq_cd *cd) +{ + return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles, + num_profiles, buf, + buf_size, num_profiles_added, cd); +} + +/** + * ice_aq_remove_rl_profile - removes RL profile(s) + * @hw: pointer to the HW struct + * @num_profiles: the number of profile(s) to remove + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_profiles_removed: total number of profiles removed to return + * @cd: pointer to command details structure or NULL + * + * Remove RL profile (0x0415) + */ +static enum ice_status +ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles, + struct ice_aqc_rl_profile_generic_elem *buf, + u16 buf_size, u16 *num_profiles_removed, + struct ice_sq_cd *cd) +{ + return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles, + num_profiles, buf, + buf_size, num_profiles_removed, cd); +} + +/** + * ice_sched_del_rl_profile - remove RL profile + * @hw: pointer to the HW struct + * @rl_info: rate limit profile information + * + * If the profile ID is not referenced anymore, it removes profile ID with + * its associated parameters from HW DB,and locally. The caller needs to + * hold scheduler lock. + */ +static enum ice_status +ice_sched_del_rl_profile(struct ice_hw *hw, + struct ice_aqc_rl_profile_info *rl_info) +{ + struct ice_aqc_rl_profile_generic_elem *buf; + u16 num_profiles_removed; + enum ice_status status; + u16 num_profiles = 1; + + if (rl_info->prof_id_ref != 0) + return ICE_ERR_IN_USE; + + /* Safe to remove profile ID */ + buf = (struct ice_aqc_rl_profile_generic_elem *) + &rl_info->profile; + status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf), + &num_profiles_removed, NULL); + if (status || num_profiles_removed != num_profiles) + return ICE_ERR_CFG; + + /* Delete stale entry now */ + list_del(&rl_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), rl_info); + return status; +} + +/** + * ice_sched_clear_rl_prof - clears RL prof entries + * @pi: port information structure + * + * This function removes all RL profile from HW as well as from SW DB. + */ +static void ice_sched_clear_rl_prof(struct ice_port_info *pi) +{ + u16 ln; + + for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) { + struct ice_aqc_rl_profile_info *rl_prof_elem; + struct ice_aqc_rl_profile_info *rl_prof_tmp; + + list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp, + &pi->rl_prof_list[ln], list_entry) { + struct ice_hw *hw = pi->hw; + enum ice_status status; + + rl_prof_elem->prof_id_ref = 0; + status = ice_sched_del_rl_profile(hw, rl_prof_elem); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, + "Remove rl profile failed\n"); + /* On error, free mem required */ + list_del(&rl_prof_elem->list_entry); + devm_kfree(ice_hw_to_dev(hw), rl_prof_elem); + } + } + } +} + +/** * ice_sched_clear_agg - clears the aggregator related information * @hw: pointer to the hardware structure * @@ -592,6 +756,8 @@ static void ice_sched_clear_tx_topo(struct ice_port_info *pi) { if (!pi) return; + /* remove RL profiles related lists */ + ice_sched_clear_rl_prof(pi); if (pi->root) { ice_free_sched_node(pi, pi->root); pi->root = NULL; @@ -632,8 +798,7 @@ void ice_sched_cleanup_all(struct ice_hw *hw) hw->layer_info = NULL; } - if (hw->port_info) - ice_sched_clear_port(hw->port_info); + ice_sched_clear_port(hw->port_info); hw->num_tx_sched_layers = 0; hw->num_tx_sched_phys_layers = 0; @@ -1014,6 +1179,8 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi) /* initialize the port for handling the scheduler tree */ pi->port_state = ICE_SCHED_PORT_STATE_READY; mutex_init(&pi->sched_lock); + for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) + INIT_LIST_HEAD(&pi->rl_prof_list[i]); err_init_port: if (status && pi->root) { @@ -1062,8 +1229,8 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) * and so on. This array will be populated from root (index 0) to * qgroup layer 7. Leaf node has no children. */ - for (i = 0; i < hw->num_tx_sched_layers; i++) { - max_sibl = buf->layer_props[i].max_sibl_grp_sz; + for (i = 0; i < hw->num_tx_sched_layers - 1; i++) { + max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz; hw->max_children[i] = le16_to_cpu(max_sibl); } @@ -1670,3 +1837,1095 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle) { return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN); } + +/** + * ice_sched_rm_unused_rl_prof - remove unused RL profile + * @pi: port information structure + * + * This function removes unused rate limit profiles from the HW and + * SW DB. The caller needs to hold scheduler lock. + */ +static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi) +{ + u16 ln; + + for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) { + struct ice_aqc_rl_profile_info *rl_prof_elem; + struct ice_aqc_rl_profile_info *rl_prof_tmp; + + list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp, + &pi->rl_prof_list[ln], list_entry) { + if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem)) + ice_debug(pi->hw, ICE_DBG_SCHED, + "Removed rl profile\n"); + } + } +} + +/** + * ice_sched_update_elem - update element + * @hw: pointer to the HW struct + * @node: pointer to node + * @info: node info to update + * + * It updates the HW DB, and local SW DB of node. It updates the scheduling + * parameters of node from argument info data buffer (Info->data buf) and + * returns success or error on config sched element failure. The caller + * needs to hold scheduler lock. + */ +static enum ice_status +ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node, + struct ice_aqc_txsched_elem_data *info) +{ + struct ice_aqc_conf_elem buf; + enum ice_status status; + u16 elem_cfgd = 0; + u16 num_elems = 1; + + buf.generic[0] = *info; + /* Parent TEID is reserved field in this aq call */ + buf.generic[0].parent_teid = 0; + /* Element type is reserved field in this aq call */ + buf.generic[0].data.elem_type = 0; + /* Flags is reserved field in this aq call */ + buf.generic[0].data.flags = 0; + + /* Update HW DB */ + /* Configure element node */ + status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf), + &elem_cfgd, NULL); + if (status || elem_cfgd != num_elems) { + ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n"); + return ICE_ERR_CFG; + } + + /* Config success case */ + /* Now update local SW DB */ + /* Only copy the data portion of info buffer */ + node->info.data = info->data; + return status; +} + +/** + * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params + * @hw: pointer to the HW struct + * @node: sched node to configure + * @rl_type: rate limit type CIR, EIR, or shared + * @bw_alloc: BW weight/allocation + * + * This function configures node element's BW allocation. + */ +static enum ice_status +ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node, + enum ice_rl_type rl_type, u8 bw_alloc) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + enum ice_status status; + + buf = node->info; + data = &buf.data; + if (rl_type == ICE_MIN_BW) { + data->valid_sections |= ICE_AQC_ELEM_VALID_CIR; + data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc); + } else if (rl_type == ICE_MAX_BW) { + data->valid_sections |= ICE_AQC_ELEM_VALID_EIR; + data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc); + } else { + return ICE_ERR_PARAM; + } + + /* Configure element */ + status = ice_sched_update_elem(hw, node, &buf); + return status; +} + +/** + * ice_set_clear_cir_bw - set or clear CIR BW + * @bw_t_info: bandwidth type information structure + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save or clear CIR bandwidth (BW) in the passed param bw_t_info. + */ +static void +ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw) +{ + if (bw == ICE_SCHED_DFLT_BW) { + clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap); + bw_t_info->cir_bw.bw = 0; + } else { + /* Save type of BW information */ + set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap); + bw_t_info->cir_bw.bw = bw; + } +} + +/** + * ice_set_clear_eir_bw - set or clear EIR BW + * @bw_t_info: bandwidth type information structure + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save or clear EIR bandwidth (BW) in the passed param bw_t_info. + */ +static void +ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw) +{ + if (bw == ICE_SCHED_DFLT_BW) { + clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap); + bw_t_info->eir_bw.bw = 0; + } else { + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element. + * First clear earlier saved shared BW information. + */ + clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap); + bw_t_info->shared_bw = 0; + /* save EIR BW information */ + set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap); + bw_t_info->eir_bw.bw = bw; + } +} + +/** + * ice_set_clear_shared_bw - set or clear shared BW + * @bw_t_info: bandwidth type information structure + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save or clear shared bandwidth (BW) in the passed param bw_t_info. + */ +static void +ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw) +{ + if (bw == ICE_SCHED_DFLT_BW) { + clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap); + bw_t_info->shared_bw = 0; + } else { + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element. + * First clear earlier saved EIR BW information. + */ + clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap); + bw_t_info->eir_bw.bw = 0; + /* save shared BW information */ + set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap); + bw_t_info->shared_bw = bw; + } +} + +/** + * ice_sched_calc_wakeup - calculate RL profile wakeup parameter + * @bw: bandwidth in Kbps + * + * This function calculates the wakeup parameter of RL profile. + */ +static u16 ice_sched_calc_wakeup(s32 bw) +{ + s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f; + s32 wakeup_f_int; + u16 wakeup = 0; + + /* Get the wakeup integer value */ + bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE); + wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec); + if (wakeup_int > 63) { + wakeup = (u16)((1 << 15) | wakeup_int); + } else { + /* Calculate fraction value up to 4 decimals + * Convert Integer value to a constant multiplier + */ + wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int; + wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER * + ICE_RL_PROF_FREQUENCY, + bytes_per_sec); + + /* Get Fraction value */ + wakeup_f = wakeup_a - wakeup_b; + + /* Round up the Fractional value via Ceil(Fractional value) */ + if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2)) + wakeup_f += 1; + + wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION, + ICE_RL_PROF_MULTIPLIER); + wakeup |= (u16)(wakeup_int << 9); + wakeup |= (u16)(0x1ff & wakeup_f_int); + } + + return wakeup; +} + +/** + * ice_sched_bw_to_rl_profile - convert BW to profile parameters + * @bw: bandwidth in Kbps + * @profile: profile parameters to return + * + * This function converts the BW to profile structure format. + */ +static enum ice_status +ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) +{ + enum ice_status status = ICE_ERR_PARAM; + s64 bytes_per_sec, ts_rate, mv_tmp; + bool found = false; + s32 encode = 0; + s64 mv = 0; + s32 i; + + /* Bw settings range is from 0.5Mb/sec to 100Gb/sec */ + if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW) + return status; + + /* Bytes per second from Kbps */ + bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE); + + /* encode is 6 bits but really useful are 5 bits */ + for (i = 0; i < 64; i++) { + u64 pow_result = BIT_ULL(i); + + ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY, + pow_result * ICE_RL_PROF_TS_MULTIPLIER); + if (ts_rate <= 0) + continue; + + /* Multiplier value */ + mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER, + ts_rate); + + /* Round to the nearest ICE_RL_PROF_MULTIPLIER */ + mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER); + + /* First multiplier value greater than the given + * accuracy bytes + */ + if (mv > ICE_RL_PROF_ACCURACY_BYTES) { + encode = i; + found = true; + break; + } + } + if (found) { + u16 wm; + + wm = ice_sched_calc_wakeup(bw); + profile->rl_multiply = cpu_to_le16(mv); + profile->wake_up_calc = cpu_to_le16(wm); + profile->rl_encode = cpu_to_le16(encode); + status = 0; + } else { + status = ICE_ERR_DOES_NOT_EXIST; + } + + return status; +} + +/** + * ice_sched_add_rl_profile - add RL profile + * @pi: port information structure + * @rl_type: type of rate limit BW - min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * @layer_num: specifies in which layer to create profile + * + * This function first checks the existing list for corresponding BW + * parameter. If it exists, it returns the associated profile otherwise + * it creates a new rate limit profile for requested BW, and adds it to + * the HW DB and local list. It returns the new profile or null on error. + * The caller needs to hold the scheduler lock. + */ +static struct ice_aqc_rl_profile_info * +ice_sched_add_rl_profile(struct ice_port_info *pi, + enum ice_rl_type rl_type, u32 bw, u8 layer_num) +{ + struct ice_aqc_rl_profile_generic_elem *buf; + struct ice_aqc_rl_profile_info *rl_prof_elem; + u16 profiles_added = 0, num_profiles = 1; + enum ice_status status; + struct ice_hw *hw; + u8 profile_type; + + if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM) + return NULL; + switch (rl_type) { + case ICE_MIN_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR; + break; + case ICE_MAX_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR; + break; + case ICE_SHARED_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL; + break; + default: + return NULL; + } + + if (!pi) + return NULL; + hw = pi->hw; + list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num], + list_entry) + if (rl_prof_elem->profile.flags == profile_type && + rl_prof_elem->bw == bw) + /* Return existing profile ID info */ + return rl_prof_elem; + + /* Create new profile ID */ + rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem), + GFP_KERNEL); + + if (!rl_prof_elem) + return NULL; + + status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile); + if (status) + goto exit_add_rl_prof; + + rl_prof_elem->bw = bw; + /* layer_num is zero relative, and fw expects level from 1 to 9 */ + rl_prof_elem->profile.level = layer_num + 1; + rl_prof_elem->profile.flags = profile_type; + rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size); + + /* Create new entry in HW DB */ + buf = (struct ice_aqc_rl_profile_generic_elem *) + &rl_prof_elem->profile; + status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf), + &profiles_added, NULL); + if (status || profiles_added != num_profiles) + goto exit_add_rl_prof; + + /* Good entry - add in the list */ + rl_prof_elem->prof_id_ref = 0; + list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]); + return rl_prof_elem; + +exit_add_rl_prof: + devm_kfree(ice_hw_to_dev(hw), rl_prof_elem); + return NULL; +} + +/** + * ice_sched_cfg_node_bw_lmt - configure node sched params + * @hw: pointer to the HW struct + * @node: sched node to configure + * @rl_type: rate limit type CIR, EIR, or shared + * @rl_prof_id: rate limit profile ID + * + * This function configures node element's BW limit. + */ +static enum ice_status +ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node, + enum ice_rl_type rl_type, u16 rl_prof_id) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + + buf = node->info; + data = &buf.data; + switch (rl_type) { + case ICE_MIN_BW: + data->valid_sections |= ICE_AQC_ELEM_VALID_CIR; + data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id); + break; + case ICE_MAX_BW: + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element + */ + if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED) + return ICE_ERR_CFG; + data->valid_sections |= ICE_AQC_ELEM_VALID_EIR; + data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id); + break; + case ICE_SHARED_BW: + /* Check for removing shared BW */ + if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) { + /* remove shared profile */ + data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED; + data->srl_id = 0; /* clear SRL field */ + + /* enable back EIR to default profile */ + data->valid_sections |= ICE_AQC_ELEM_VALID_EIR; + data->eir_bw.bw_profile_idx = + cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID); + break; + } + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element + */ + if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) && + (le16_to_cpu(data->eir_bw.bw_profile_idx) != + ICE_SCHED_DFLT_RL_PROF_ID)) + return ICE_ERR_CFG; + /* EIR BW is set to default, disable it */ + data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR; + /* Okay to enable shared BW now */ + data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED; + data->srl_id = cpu_to_le16(rl_prof_id); + break; + default: + /* Unknown rate limit type */ + return ICE_ERR_PARAM; + } + + /* Configure element */ + return ice_sched_update_elem(hw, node, &buf); +} + +/** + * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID + * @node: sched node + * @rl_type: rate limit type + * + * If existing profile matches, it returns the corresponding rate + * limit profile ID, otherwise it returns an invalid ID as error. + */ +static u16 +ice_sched_get_node_rl_prof_id(struct ice_sched_node *node, + enum ice_rl_type rl_type) +{ + u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID; + struct ice_aqc_txsched_elem *data; + + data = &node->info.data; + switch (rl_type) { + case ICE_MIN_BW: + if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR) + rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx); + break; + case ICE_MAX_BW: + if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR) + rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx); + break; + case ICE_SHARED_BW: + if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED) + rl_prof_id = le16_to_cpu(data->srl_id); + break; + default: + break; + } + + return rl_prof_id; +} + +/** + * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer + * @pi: port information structure + * @rl_type: type of rate limit BW - min, max, or shared + * @layer_index: layer index + * + * This function returns requested profile creation layer. + */ +static u8 +ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type, + u8 layer_index) +{ + struct ice_hw *hw = pi->hw; + + if (layer_index >= hw->num_tx_sched_layers) + return ICE_SCHED_INVAL_LAYER_NUM; + switch (rl_type) { + case ICE_MIN_BW: + if (hw->layer_info[layer_index].max_cir_rl_profiles) + return layer_index; + break; + case ICE_MAX_BW: + if (hw->layer_info[layer_index].max_eir_rl_profiles) + return layer_index; + break; + case ICE_SHARED_BW: + /* if current layer doesn't support SRL profile creation + * then try a layer up or down. + */ + if (hw->layer_info[layer_index].max_srl_profiles) + return layer_index; + else if (layer_index < hw->num_tx_sched_layers - 1 && + hw->layer_info[layer_index + 1].max_srl_profiles) + return layer_index + 1; + else if (layer_index > 0 && + hw->layer_info[layer_index - 1].max_srl_profiles) + return layer_index - 1; + break; + default: + break; + } + return ICE_SCHED_INVAL_LAYER_NUM; +} + +/** + * ice_sched_get_srl_node - get shared rate limit node + * @node: tree node + * @srl_layer: shared rate limit layer + * + * This function returns SRL node to be used for shared rate limit purpose. + * The caller needs to hold scheduler lock. + */ +static struct ice_sched_node * +ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer) +{ + if (srl_layer > node->tx_sched_layer) + return node->children[0]; + else if (srl_layer < node->tx_sched_layer) + /* Node can't be created without a parent. It will always + * have a valid parent except root node. + */ + return node->parent; + else + return node; +} + +/** + * ice_sched_rm_rl_profile - remove RL profile ID + * @pi: port information structure + * @layer_num: layer number where profiles are saved + * @profile_type: profile type like EIR, CIR, or SRL + * @profile_id: profile ID to remove + * + * This function removes rate limit profile from layer 'layer_num' of type + * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold + * scheduler lock. + */ +static enum ice_status +ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type, + u16 profile_id) +{ + struct ice_aqc_rl_profile_info *rl_prof_elem; + enum ice_status status = 0; + + if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM) + return ICE_ERR_PARAM; + /* Check the existing list for RL profile */ + list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num], + list_entry) + if (rl_prof_elem->profile.flags == profile_type && + le16_to_cpu(rl_prof_elem->profile.profile_id) == + profile_id) { + if (rl_prof_elem->prof_id_ref) + rl_prof_elem->prof_id_ref--; + + /* Remove old profile ID from database */ + status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem); + if (status && status != ICE_ERR_IN_USE) + ice_debug(pi->hw, ICE_DBG_SCHED, + "Remove rl profile failed\n"); + break; + } + if (status == ICE_ERR_IN_USE) + status = 0; + return status; +} + +/** + * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default + * @pi: port information structure + * @node: pointer to node structure + * @rl_type: rate limit type min, max, or shared + * @layer_num: layer number where RL profiles are saved + * + * This function configures node element's BW rate limit profile ID of + * type CIR, EIR, or SRL to default. This function needs to be called + * with the scheduler lock held. + */ +static enum ice_status +ice_sched_set_node_bw_dflt(struct ice_port_info *pi, + struct ice_sched_node *node, + enum ice_rl_type rl_type, u8 layer_num) +{ + enum ice_status status; + struct ice_hw *hw; + u8 profile_type; + u16 rl_prof_id; + u16 old_id; + + hw = pi->hw; + switch (rl_type) { + case ICE_MIN_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR; + rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID; + break; + case ICE_MAX_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR; + rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID; + break; + case ICE_SHARED_BW: + profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL; + /* No SRL is configured for default case */ + rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID; + break; + default: + return ICE_ERR_PARAM; + } + /* Save existing RL prof ID for later clean up */ + old_id = ice_sched_get_node_rl_prof_id(node, rl_type); + /* Configure BW scheduling parameters */ + status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id); + if (status) + return status; + + /* Remove stale RL profile ID */ + if (old_id == ICE_SCHED_DFLT_RL_PROF_ID || + old_id == ICE_SCHED_INVAL_PROF_ID) + return 0; + + return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id); +} + +/** + * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness + * @pi: port information structure + * @node: pointer to node structure + * @layer_num: layer number where rate limit profiles are saved + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth value + * + * This function prepares node element's bandwidth to SRL or EIR exclusively. + * EIR BW and Shared BW profiles are mutually exclusive and hence only one of + * them may be set for any given element. This function needs to be called + * with the scheduler lock held. + */ +static enum ice_status +ice_sched_set_eir_srl_excl(struct ice_port_info *pi, + struct ice_sched_node *node, + u8 layer_num, enum ice_rl_type rl_type, u32 bw) +{ + if (rl_type == ICE_SHARED_BW) { + /* SRL node passed in this case, it may be different node */ + if (bw == ICE_SCHED_DFLT_BW) + /* SRL being removed, ice_sched_cfg_node_bw_lmt() + * enables EIR to default. EIR is not set in this + * case, so no additional action is required. + */ + return 0; + + /* SRL being configured, set EIR to default here. + * ice_sched_cfg_node_bw_lmt() disables EIR when it + * configures SRL + */ + return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW, + layer_num); + } else if (rl_type == ICE_MAX_BW && + node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) { + /* Remove Shared profile. Set default shared BW call + * removes shared profile for a node. + */ + return ice_sched_set_node_bw_dflt(pi, node, + ICE_SHARED_BW, + layer_num); + } + return 0; +} + +/** + * ice_sched_set_node_bw - set node's bandwidth + * @pi: port information structure + * @node: tree node + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * @layer_num: layer number + * + * This function adds new profile corresponding to requested BW, configures + * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile + * ID from local database. The caller needs to hold scheduler lock. + */ +static enum ice_status +ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw, u8 layer_num) +{ + struct ice_aqc_rl_profile_info *rl_prof_info; + enum ice_status status = ICE_ERR_PARAM; + struct ice_hw *hw = pi->hw; + u16 old_id, rl_prof_id; + + rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num); + if (!rl_prof_info) + return status; + + rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id); + + /* Save existing RL prof ID for later clean up */ + old_id = ice_sched_get_node_rl_prof_id(node, rl_type); + /* Configure BW scheduling parameters */ + status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id); + if (status) + return status; + + /* New changes has been applied */ + /* Increment the profile ID reference count */ + rl_prof_info->prof_id_ref++; + + /* Check for old ID removal */ + if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) || + old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id) + return 0; + + return ice_sched_rm_rl_profile(pi, layer_num, + rl_prof_info->profile.flags, + old_id); +} + +/** + * ice_sched_set_node_bw_lmt - set node's BW limit + * @pi: port information structure + * @node: tree node + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * It updates node's BW limit parameters like BW RL profile ID of type CIR, + * EIR, or SRL. The caller needs to hold scheduler lock. + */ +static enum ice_status +ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw) +{ + struct ice_sched_node *cfg_node = node; + enum ice_status status; + + struct ice_hw *hw; + u8 layer_num; + + if (!pi) + return ICE_ERR_PARAM; + hw = pi->hw; + /* Remove unused RL profile IDs from HW and SW DB */ + ice_sched_rm_unused_rl_prof(pi); + layer_num = ice_sched_get_rl_prof_layer(pi, rl_type, + node->tx_sched_layer); + if (layer_num >= hw->num_tx_sched_layers) + return ICE_ERR_PARAM; + + if (rl_type == ICE_SHARED_BW) { + /* SRL node may be different */ + cfg_node = ice_sched_get_srl_node(node, layer_num); + if (!cfg_node) + return ICE_ERR_CFG; + } + /* EIR BW and Shared BW profiles are mutually exclusive and + * hence only one of them may be set for any given element + */ + status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type, + bw); + if (status) + return status; + if (bw == ICE_SCHED_DFLT_BW) + return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type, + layer_num); + return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num); +} + +/** + * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default + * @pi: port information structure + * @node: pointer to node structure + * @rl_type: rate limit type min, max, or shared + * + * This function configures node element's BW rate limit profile ID of + * type CIR, EIR, or SRL to default. This function needs to be called + * with the scheduler lock held. + */ +static enum ice_status +ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi, + struct ice_sched_node *node, + enum ice_rl_type rl_type) +{ + return ice_sched_set_node_bw_lmt(pi, node, rl_type, + ICE_SCHED_DFLT_BW); +} + +/** + * ice_sched_validate_srl_node - Check node for SRL applicability + * @node: sched node to configure + * @sel_layer: selected SRL layer + * + * This function checks if the SRL can be applied to a selected layer node on + * behalf of the requested node (first argument). This function needs to be + * called with scheduler lock held. + */ +static enum ice_status +ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer) +{ + /* SRL profiles are not available on all layers. Check if the + * SRL profile can be applied to a node above or below the + * requested node. SRL configuration is possible only if the + * selected layer's node has single child. + */ + if (sel_layer == node->tx_sched_layer || + ((sel_layer == node->tx_sched_layer + 1) && + node->num_children == 1) || + ((sel_layer == node->tx_sched_layer - 1) && + (node->parent && node->parent->num_children == 1))) + return 0; + + return ICE_ERR_CFG; +} + +/** + * ice_sched_save_q_bw - save queue node's BW information + * @q_ctx: queue context structure + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save BW information of queue type node for post replay use. + */ +static enum ice_status +ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw) +{ + switch (rl_type) { + case ICE_MIN_BW: + ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw); + break; + case ICE_MAX_BW: + ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw); + break; + case ICE_SHARED_BW: + ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw); + break; + default: + return ICE_ERR_PARAM; + } + return 0; +} + +/** + * ice_sched_set_q_bw_lmt - sets queue BW limit + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @q_handle: software queue handle + * @rl_type: min, max, or shared + * @bw: bandwidth in Kbps + * + * This function sets BW limit of queue scheduling node. + */ +static enum ice_status +ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type, u32 bw) +{ + enum ice_status status = ICE_ERR_PARAM; + struct ice_sched_node *node; + struct ice_q_ctx *q_ctx; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + return ICE_ERR_PARAM; + mutex_lock(&pi->sched_lock); + q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle); + if (!q_ctx) + goto exit_q_bw_lmt; + node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid); + if (!node) { + ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n"); + goto exit_q_bw_lmt; + } + + /* Return error if it is not a leaf node */ + if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) + goto exit_q_bw_lmt; + + /* SRL bandwidth layer selection */ + if (rl_type == ICE_SHARED_BW) { + u8 sel_layer; /* selected layer */ + + sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type, + node->tx_sched_layer); + if (sel_layer >= pi->hw->num_tx_sched_layers) { + status = ICE_ERR_PARAM; + goto exit_q_bw_lmt; + } + status = ice_sched_validate_srl_node(node, sel_layer); + if (status) + goto exit_q_bw_lmt; + } + + if (bw == ICE_SCHED_DFLT_BW) + status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type); + else + status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw); + + if (!status) + status = ice_sched_save_q_bw(q_ctx, rl_type, bw); + +exit_q_bw_lmt: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_q_bw_lmt - configure queue BW limit + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @q_handle: software queue handle + * @rl_type: min, max, or shared + * @bw: bandwidth in Kbps + * + * This function configures BW limit of queue scheduling node. + */ +enum ice_status +ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type, u32 bw) +{ + return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type, + bw); +} + +/** + * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @q_handle: software queue handle + * @rl_type: min, max, or shared + * + * This function configures BW default limit of queue scheduling node. + */ +enum ice_status +ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type) +{ + return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type, + ICE_SCHED_DFLT_BW); +} + +/** + * ice_cfg_rl_burst_size - Set burst size value + * @hw: pointer to the HW struct + * @bytes: burst size in bytes + * + * This function configures/set the burst size to requested new value. The new + * burst size value is used for future rate limit calls. It doesn't change the + * existing or previously created RL profiles. + */ +enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes) +{ + u16 burst_size_to_prog; + + if (bytes < ICE_MIN_BURST_SIZE_ALLOWED || + bytes > ICE_MAX_BURST_SIZE_ALLOWED) + return ICE_ERR_PARAM; + if (ice_round_to_num(bytes, 64) <= + ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) { + /* 64 byte granularity case */ + /* Disable MSB granularity bit */ + burst_size_to_prog = ICE_64_BYTE_GRANULARITY; + /* round number to nearest 64 byte granularity */ + bytes = ice_round_to_num(bytes, 64); + /* The value is in 64 byte chunks */ + burst_size_to_prog |= (u16)(bytes / 64); + } else { + /* k bytes granularity case */ + /* Enable MSB granularity bit */ + burst_size_to_prog = ICE_KBYTE_GRANULARITY; + /* round number to nearest 1024 granularity */ + bytes = ice_round_to_num(bytes, 1024); + /* check rounding doesn't go beyond allowed */ + if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY) + bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY; + /* The value is in k bytes */ + burst_size_to_prog |= (u16)(bytes / 1024); + } + hw->max_burst_size = burst_size_to_prog; + return 0; +} + +/** + * ice_sched_replay_node_prio - re-configure node priority + * @hw: pointer to the HW struct + * @node: sched node to configure + * @priority: priority value + * + * This function configures node element's priority value. It + * needs to be called with scheduler lock held. + */ +static enum ice_status +ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node, + u8 priority) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + enum ice_status status; + + buf = node->info; + data = &buf.data; + data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC; + data->generic = priority; + + /* Configure element */ + status = ice_sched_update_elem(hw, node, &buf); + return status; +} + +/** + * ice_sched_replay_node_bw - replay node(s) BW + * @hw: pointer to the HW struct + * @node: sched node to configure + * @bw_t_info: BW type information + * + * This function restores node's BW from bw_t_info. The caller needs + * to hold the scheduler lock. + */ +static enum ice_status +ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node, + struct ice_bw_type_info *bw_t_info) +{ + struct ice_port_info *pi = hw->port_info; + enum ice_status status = ICE_ERR_PARAM; + u16 bw_alloc; + + if (!node) + return status; + if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT)) + return 0; + if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) { + status = ice_sched_replay_node_prio(hw, node, + bw_t_info->generic); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) { + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, + bw_t_info->cir_bw.bw); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) { + bw_alloc = bw_t_info->cir_bw.bw_alloc; + status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW, + bw_alloc); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) { + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, + bw_t_info->eir_bw.bw); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) { + bw_alloc = bw_t_info->eir_bw.bw_alloc; + status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW, + bw_alloc); + if (status) + return status; + } + if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap)) + status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW, + bw_t_info->shared_bw); + return status; +} + +/** + * ice_sched_replay_q_bw - replay queue type node BW + * @pi: port information structure + * @q_ctx: queue context structure + * + * This function replays queue type node bandwidth. This function needs to be + * called with scheduler lock held. + */ +enum ice_status +ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx) +{ + struct ice_sched_node *q_node; + + /* Following also checks the presence of node in tree */ + q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid); + if (!q_node) + return ICE_ERR_PARAM; + return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info); +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 3902a8ad3025..f0593cfb6521 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -8,6 +8,36 @@ #define ICE_QGRP_LAYER_OFFSET 2 #define ICE_VSI_LAYER_OFFSET 4 +#define ICE_SCHED_INVAL_LAYER_NUM 0xFF +/* Burst size is a 12 bits register that is configured while creating the RL + * profile(s). MSB is a granularity bit and tells the granularity type + * 0 - LSB bits are in 64 bytes granularity + * 1 - LSB bits are in 1K bytes granularity + */ +#define ICE_64_BYTE_GRANULARITY 0 +#define ICE_KBYTE_GRANULARITY BIT(11) +#define ICE_MIN_BURST_SIZE_ALLOWED 64 /* In Bytes */ +#define ICE_MAX_BURST_SIZE_ALLOWED \ + ((BIT(11) - 1) * 1024) /* In Bytes */ +#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \ + ((BIT(11) - 1) * 64) /* In Bytes */ +#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED + +#define ICE_RL_PROF_FREQUENCY 446000000 +#define ICE_RL_PROF_ACCURACY_BYTES 128 +#define ICE_RL_PROF_MULTIPLIER 10000 +#define ICE_RL_PROF_TS_MULTIPLIER 32 +#define ICE_RL_PROF_FRACTION 512 + +/* BW rate limit profile parameters list entry along + * with bandwidth maintained per layer in port info + */ +struct ice_aqc_rl_profile_info { + struct ice_aqc_rl_profile_elem profile; + struct list_head list_entry; + u32 bw; /* requested */ + u16 prof_id_ref; /* profile ID to node association ref count */ +}; struct ice_sched_agg_vsi_info { struct list_head list_entry; @@ -48,4 +78,13 @@ enum ice_status ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, u8 owner, bool enable); enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle); +enum ice_status +ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type, u32 bw); +enum ice_status +ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + u16 q_handle, enum ice_rl_type rl_type); +enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes); +enum ice_status +ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx); #endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 1acdd43a2edd..b5a53f862a83 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -416,8 +416,7 @@ ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx); } else { /* update with new HW VSI num */ - if (tmp_vsi_ctx->vsi_num != vsi_ctx->vsi_num) - tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num; + tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num; } return 0; @@ -2429,7 +2428,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; - if (vid) + if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX)) recipe_id = ICE_SW_LKUP_PROMISC_VLAN; else recipe_id = ICE_SW_LKUP_PROMISC; @@ -2441,13 +2440,18 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, mutex_lock(rule_lock); list_for_each_entry(itr, rule_head, list_entry) { + struct ice_fltr_info *fltr_info; u8 fltr_promisc_mask = 0; if (!ice_vsi_uses_fltr(itr, vsi_handle)) continue; + fltr_info = &itr->fltr_info; + + if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN && + vid != fltr_info->l_data.mac_vlan.vlan_id) + continue; - fltr_promisc_mask |= - ice_determine_promisc_mask(&itr->fltr_info); + fltr_promisc_mask |= ice_determine_promisc_mask(fltr_info); /* Skip if filter is not completely specified by given mask */ if (fltr_promisc_mask & ~promisc_mask) @@ -2455,7 +2459,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, &remove_list_head, - &itr->fltr_info); + fltr_info); if (status) { mutex_unlock(rule_lock); goto free_fltr_list; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index cb123fbe30be..fa14b9545dab 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -14,11 +14,6 @@ #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF -/* VSI queue context structure */ -struct ice_q_ctx { - u16 q_handle; -}; - /* VSI context structure for add/get/update/free operations */ struct ice_vsi_ctx { u16 vsi_num; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 33dd103035dc..2c212f64d99f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -5,8 +5,13 @@ #include <linux/prefetch.h> #include <linux/mm.h> +#include <linux/bpf_trace.h> +#include <net/xdp.h> +#include "ice_txrx_lib.h" +#include "ice_lib.h" #include "ice.h" #include "ice_dcb_lib.h" +#include "ice_xsk.h" #define ICE_RX_HDR_SIZE 256 @@ -19,7 +24,10 @@ static void ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) { if (tx_buf->skb) { - dev_kfree_skb_any(tx_buf->skb); + if (ice_ring_is_xdp(ring)) + page_frag_free(tx_buf->raw_buf); + else + dev_kfree_skb_any(tx_buf->skb); if (dma_unmap_len(tx_buf, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buf, dma), @@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring) { u16 i; + if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) { + ice_xsk_clean_xdp_ring(tx_ring); + goto tx_skip_free; + } + /* ring already cleared, nothing to do */ if (!tx_ring->tx_buf) return; @@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring) for (i = 0; i < tx_ring->count; i++) ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]); +tx_skip_free: memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count); /* Zero out the descriptor ring */ @@ -136,8 +150,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) total_bytes += tx_buf->bytecount; total_pkts += tx_buf->gso_segs; - /* free the skb */ - napi_consume_skb(tx_buf->skb, napi_budget); + if (ice_ring_is_xdp(tx_ring)) + page_frag_free(tx_buf->raw_buf); + else + /* free the skb */ + napi_consume_skb(tx_buf->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -188,12 +205,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) i += tx_ring->count; tx_ring->next_to_clean = i; - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->stats.bytes += total_bytes; - tx_ring->stats.pkts += total_pkts; - u64_stats_update_end(&tx_ring->syncp); - tx_ring->q_vector->tx.total_bytes += total_bytes; - tx_ring->q_vector->tx.total_pkts += total_pkts; + + ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes); + + if (ice_ring_is_xdp(tx_ring)) + return !!budget; netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, total_bytes); @@ -273,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) if (!rx_ring->rx_buf) return; + if (rx_ring->xsk_umem) { + ice_xsk_clean_rx_ring(rx_ring); + goto rx_skip_free; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; @@ -289,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) */ dma_sync_single_range_for_cpu(dev, rx_buf->dma, rx_buf->page_offset, - ICE_RXBUF_2048, DMA_FROM_DEVICE); + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); /* free resources associated with mapping */ - dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE, + dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); @@ -300,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) rx_buf->page_offset = 0; } +rx_skip_free: memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count); /* Zero out the descriptor ring */ @@ -319,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) void ice_free_rx_ring(struct ice_ring *rx_ring) { ice_clean_rx_ring(rx_ring); + if (rx_ring->vsi->type == ICE_VSI_PF) + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + rx_ring->xdp_prog = NULL; devm_kfree(rx_ring->dev, rx_ring->rx_buf); rx_ring->rx_buf = NULL; @@ -363,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring) rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; + + if (ice_is_xdp_ena_vsi(rx_ring->vsi)) + WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); + + if (rx_ring->vsi->type == ICE_VSI_PF && + !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->q_index)) + goto err; return 0; err: @@ -372,34 +408,110 @@ err: } /** - * ice_release_rx_desc - Store the new tail and head values - * @rx_ring: ring to bump - * @val: new head index + * ice_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. */ -static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) +static unsigned int ice_rx_offset(struct ice_ring *rx_ring) { - u16 prev_ntu = rx_ring->next_to_use; + if (ice_ring_uses_build_skb(rx_ring)) + return ICE_SKB_PAD; + else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) + return XDP_PACKET_HEADROOM; - rx_ring->next_to_use = val; + return 0; +} - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; +/** + * ice_run_xdp - Executes an XDP program on initialized xdp_buff + * @rx_ring: Rx ring + * @xdp: xdp_buff used as input to the XDP program + * @xdp_prog: XDP program to run + * + * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} + */ +static int +ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + int err, result = ICE_XDP_PASS; + struct ice_ring *xdp_ring; + u32 act; - /* QRX_TAIL will be updated with any tail value, but hardware ignores - * the lower 3 bits. This makes it so we only bump tail on meaningful - * boundaries. Also, this allows us to bump tail on intervals of 8 up to - * the budget depending on the current traffic load. - */ - val &= ~0x7; - if (prev_ntu != val) { - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - writel(val, rx_ring->tail); + act = bpf_prog_run_xdp(xdp_prog, xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()]; + result = ice_xmit_xdp_buff(xdp, xdp_ring); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fallthrough -- not supported action */ + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping frame */ + case XDP_DROP: + result = ICE_XDP_CONSUMED; + break; } + + return result; +} + +/** + * ice_xdp_xmit - submit packets to XDP ring for transmission + * @dev: netdev + * @n: number of XDP frames to be transmitted + * @frames: XDP frames to be transmitted + * @flags: transmit flags + * + * Returns number of frames successfully sent. Frames that fail are + * free'ed via XDP return API. + * For error cases, a negative errno code is returned and no-frames + * are transmitted (caller must handle freeing frames). + */ +int +ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + unsigned int queue_index = smp_processor_id(); + struct ice_vsi *vsi = np->vsi; + struct ice_ring *xdp_ring; + int drops = 0, i; + + if (test_bit(__ICE_DOWN, vsi->state)) + return -ENETDOWN; + + if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq) + return -ENXIO; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + xdp_ring = vsi->xdp_rings[queue_index]; + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; + + err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); + if (err != ICE_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + if (unlikely(flags & XDP_XMIT_FLUSH)) + ice_xdp_ring_update_tail(xdp_ring); + + return n - drops; } /** @@ -423,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) } /* alloc new page for storage */ - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_page_failed++; return false; } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, 0); + __free_pages(page, ice_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_page_failed++; return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = ice_rx_offset(rx_ring); page_ref_add(page, USHRT_MAX - 1); bi->pagecnt_bias = USHRT_MAX; @@ -486,7 +598,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - ICE_RXBUF_2048, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change @@ -557,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) */ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) { -#if (PAGE_SIZE >= 8192) - unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; -#endif unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; @@ -572,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; #else - if (rx_buf->page_offset > last_offset) +#define ICE_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; #endif /* PAGE_SIZE < 8192) */ @@ -590,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) /** * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag + * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: buffer containing page to add * @skb: sk_buff to place the data into * @size: packet length from rx_desc @@ -599,13 +711,13 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) * The function will then update the page offset. */ static void -ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, - unsigned int size) +ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + struct sk_buff *skb, unsigned int size) { #if (PAGE_SIZE >= 8192) - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring)); #else - unsigned int truesize = ICE_RXBUF_2048; + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; #endif if (!size) @@ -679,10 +791,64 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, } /** + * ice_build_skb - Build skb around an existing buffer + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buf: Rx buffer to pull data from + * @xdp: xdp_buff pointing to the data + * + * This function builds an skb around an existing Rx buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. + */ +static struct sk_buff * +ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); +#endif + struct sk_buff *skb; + + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points exactly as xdp->data, otherwise we + * likely have a consumer accessing first few bytes of meta + * data, and then actual data. + */ + prefetch(xdp->data_meta); +#if L1_CACHE_BYTES < 128 + prefetch((void *)(xdp->data + L1_CACHE_BYTES)); +#endif + /* build an skb around the page buffer */ + skb = build_skb(xdp->data_hard_start, truesize); + if (unlikely(!skb)) + return NULL; + + /* must to record Rx queue, otherwise OS features such as + * symmetric queue won't work + */ + skb_record_rx_queue(skb, rx_ring->q_index); + + /* update pointers within the skb to store the data */ + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); + + /* buffer is used by skb, update page_offset */ + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); + + return skb; +} + +/** * ice_construct_skb - Allocate skb and populate it * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from - * @size: the length of the packet + * @xdp: xdp_buff pointing to the data * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the @@ -690,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, */ static struct sk_buff * ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, - unsigned int size) + struct xdp_buff *xdp) { - void *va = page_address(rx_buf->page) + rx_buf->page_offset; + unsigned int size = xdp->data_end - xdp->data; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - prefetch(va); + prefetch(xdp->data); #if L1_CACHE_BYTES < 128 - prefetch((u8 *)va + L1_CACHE_BYTES); + prefetch((void *)(xdp->data + L1_CACHE_BYTES)); #endif /* L1_CACHE_BYTES */ /* allocate a skb to store the frags */ @@ -712,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, /* Determine available headroom for copy */ headlen = size; if (headlen > ICE_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, + sizeof(long))); /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; @@ -723,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, #if (PAGE_SIZE >= 8192) unsigned int truesize = SKB_DATA_ALIGN(size); #else - unsigned int truesize = ICE_RXBUF_2048; + unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; #endif skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen, size, truesize); @@ -745,11 +912,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from * - * This function will clean up the contents of the rx_buf. It will - * either recycle the buffer or unmap it and free the associated resources. + * This function will update next_to_clean and then clean up the contents + * of the rx_buf. It will either recycle the buffer or unmap it and free + * the associated resources. */ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) { + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + if (!rx_buf) return; @@ -759,8 +933,9 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE, - DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); + dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, + ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE, + ICE_RX_DMA_ATTR); __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); } @@ -770,227 +945,31 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) } /** - * ice_cleanup_headers - Correct empty headers - * @skb: pointer to current skb being fixed - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - */ -static bool ice_cleanup_headers(struct sk_buff *skb) -{ - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - - return false; -} - -/** - * ice_test_staterr - tests bits in Rx descriptor status and error fields - * @rx_desc: pointer to receive descriptor (in le64 format) - * @stat_err_bits: value to mask - * - * This function does some fast chicanery in order to return the - * value of the mask which is really only used for boolean tests. - * The status_error_len doesn't need to be shifted because it begins - * at offset zero. - */ -static bool -ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits) -{ - return !!(rx_desc->wb.status_error0 & - cpu_to_le16(stat_err_bits)); -} - -/** * ice_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer * @skb: Current socket buffer containing buffer in progress * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. + * If the buffer is an EOP buffer, this function exits returning false, + * otherwise return true indicating that this is in fact a non-EOP buffer. */ static bool ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(ICE_RX_DESC(rx_ring, ntc)); - /* if we are the last buffer then there is nothing else to do */ #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) return false; /* place skb in next buffer to be received */ - rx_ring->rx_buf[ntc].skb = skb; + rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; } /** - * ice_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - */ -static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) -{ - return PKT_HASH_TYPE_NONE; -} - -/** - * ice_rx_hash - set the hash value in the skb - * @rx_ring: descriptor ring - * @rx_desc: specific descriptor - * @skb: pointer to current skb - * @rx_ptype: the ptype value from the descriptor - */ -static void -ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 rx_ptype) -{ - struct ice_32b_rx_flex_desc_nic *nic_mdid; - u32 hash; - - if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) - return; - - if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) - return; - - nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; - hash = le32_to_cpu(nic_mdid->rss_hash); - skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); -} - -/** - * ice_rx_csum - Indicate in skb if checksum is good - * @ring: the ring we care about - * @skb: skb currently being received and modified - * @rx_desc: the receive descriptor - * @ptype: the packet type decoded by hardware - * - * skb->protocol must be set before this function is called - */ -static void -ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, - union ice_32b_rx_flex_desc *rx_desc, u8 ptype) -{ - struct ice_rx_ptype_decoded decoded; - u32 rx_error, rx_status; - bool ipv4, ipv6; - - rx_status = le16_to_cpu(rx_desc->wb.status_error0); - rx_error = rx_status; - - decoded = ice_decode_rx_desc_ptype(ptype); - - /* Start with CHECKSUM_NONE and by default csum_level = 0 */ - skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); - - /* check if Rx checksum is enabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) - return; - - /* check if HW has decoded the packet and checksum */ - if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) - return; - - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); - - if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | - BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) - goto checksum_fail; - else if (ipv6 && (rx_status & - (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) - goto checksum_fail; - - /* check for L4 errors and handle packets that were not able to be - * checksummed due to arrival speed - */ - if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) - goto checksum_fail; - - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case ICE_RX_PTYPE_INNER_PROT_TCP: - case ICE_RX_PTYPE_INNER_PROT_UDP: - case ICE_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - default: - break; - } - return; - -checksum_fail: - ring->vsi->back->hw_csum_rx_error++; -} - -/** - * ice_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: Rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * @ptype: the packet type decoded by hardware - * - * This function checks the ring, descriptor, and packet information in - * order to populate the hash, checksum, VLAN, protocol, and - * other fields within the skb. - */ -static void -ice_process_skb_fields(struct ice_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 ptype) -{ - ice_rx_hash(rx_ring, rx_desc, skb, ptype); - - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); - - ice_rx_csum(rx_ring, skb, rx_desc, ptype); -} - -/** - * ice_receive_skb - Send a completed packet up the stack - * @rx_ring: Rx ring in play - * @skb: packet to send up - * @vlan_tag: VLAN tag for packet - * - * This function sends the completed packet (via. skb) up the stack using - * gro receive functions (with/without VLAN tag) - */ -static void -ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) -{ - if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && - (vlan_tag & VLAN_VID_MASK)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - napi_gro_receive(&rx_ring->q_vector->napi, skb); -} - -/** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process @@ -1006,8 +985,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + unsigned int xdp_res, xdp_xmit = 0; + struct bpf_prog *xdp_prog = NULL; + struct xdp_buff xdp; bool failure; + xdp.rxq = &rx_ring->xdp_rxq; + /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; @@ -1042,10 +1026,57 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ rx_buf = ice_get_rx_buf(rx_ring, &skb, size); + if (!size) { + xdp.data = NULL; + xdp.data_end = NULL; + xdp.data_hard_start = NULL; + xdp.data_meta = NULL; + goto construct_skb; + } + + xdp.data = page_address(rx_buf->page) + rx_buf->page_offset; + xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring); + xdp.data_meta = xdp.data; + xdp.data_end = xdp.data + size; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + goto construct_skb; + } + + xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog); + rcu_read_unlock(); + if (!xdp_res) + goto construct_skb; + if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { + unsigned int truesize; + +#if (PAGE_SIZE < 8192) + truesize = ice_rx_pg_size(rx_ring) / 2; +#else + truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + + size); +#endif + xdp_xmit |= xdp_res; + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); + } else { + rx_buf->pagecnt_bias++; + } + total_rx_bytes += size; + total_rx_pkts++; + + cleaned_count++; + ice_put_rx_buf(rx_ring, rx_buf); + continue; +construct_skb: if (skb) - ice_add_rx_frag(rx_buf, skb, size); + ice_add_rx_frag(rx_ring, rx_buf, skb, size); + else if (ice_ring_uses_build_skb(rx_ring)) + skb = ice_build_skb(rx_ring, rx_buf, &xdp); else - skb = ice_construct_skb(rx_ring, rx_buf, size); + skb = ice_construct_skb(rx_ring, rx_buf, &xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -1072,10 +1103,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) if (ice_test_staterr(rx_desc, stat_err_bits)) vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); - /* correct empty headers and pad skb if needed (to make valid - * ethernet frame - */ - if (ice_cleanup_headers(skb)) { + /* pad the skb if needed, to make a valid ethernet frame */ + if (eth_skb_pad(skb)) { skb = NULL; continue; } @@ -1099,13 +1128,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); - /* update queue and vector specific stats */ - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->stats.pkts += total_rx_pkts; - rx_ring->stats.bytes += total_rx_bytes; - u64_stats_update_end(&rx_ring->syncp); - rx_ring->q_vector->rx.total_pkts += total_rx_pkts; - rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + if (xdp_prog) + ice_finalize_xdp_rx(rx_ring, xdp_xmit); + + ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); /* guarantee a trip back through this routine if there was a failure */ return failure ? budget : (int)total_rx_pkts; @@ -1483,9 +1509,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ - ice_for_each_ring(ring, q_vector->tx) - if (!ice_clean_tx_irq(ring, budget)) + ice_for_each_ring(ring, q_vector->tx) { + bool wd = ring->xsk_umem ? + ice_clean_tx_irq_zc(ring, budget) : + ice_clean_tx_irq(ring, budget); + + if (!wd) clean_complete = false; + } /* Handle case where we are called by netpoll with a budget of 0 */ if (unlikely(budget <= 0)) @@ -1505,7 +1536,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget) ice_for_each_ring(ring, q_vector->rx) { int cleaned; - cleaned = ice_clean_rx_irq(ring, budget_per_ring); + /* A dedicated path for zero-copy allows making a single + * comparison in the irq context instead of many inside the + * ice_clean_rx_irq function and makes the codebase cleaner. + */ + cleaned = ring->xsk_umem ? + ice_clean_rx_irq_zc(ring, budget_per_ring) : + ice_clean_rx_irq(ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ if (cleaned >= budget_per_ring) @@ -1527,17 +1564,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget) return min_t(int, work_done, budget - 1); } -/* helper function for building cmd/type/offset */ -static __le64 -build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) -{ - return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | - (td_cmd << ICE_TXD_QW1_CMD_S) | - (td_offset << ICE_TXD_QW1_OFFSET_S) | - ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | - (td_tag << ICE_TXD_QW1_L2TAG1_S)); -} - /** * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions * @tx_ring: the ring to be checked @@ -1689,9 +1715,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, i = 0; /* write last descriptor with RS and EOP bits */ - td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS); - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag); + td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD; + tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size, + td_tag); /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 94a9280193e2..a84cc0e6dd27 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -4,8 +4,12 @@ #ifndef _ICE_TXRX_H_ #define _ICE_TXRX_H_ +#include "ice_type.h" + #define ICE_DFLT_IRQ_WORK 256 +#define ICE_RXBUF_3072 3072 #define ICE_RXBUF_2048 2048 +#define ICE_RXBUF_1536 1536 #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 @@ -22,6 +26,71 @@ #define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ #define ICE_MAX_TXQ_PER_TXQG 128 +/* Attempt to maximize the headroom available for incoming frames. We use a 2K + * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame. + * This leaves us with 512 bytes of room. From that we need to deduct the + * space needed for the shared info and the padding needed to IP align the + * frame. + * + * Note: For cache line sizes 256 or larger this value is going to end + * up negative. In these cases we should fall back to the legacy + * receive path. + */ +#if (PAGE_SIZE < 8192) +#define ICE_2K_TOO_SMALL_WITH_PADDING \ +((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048)) + +/** + * ice_compute_pad - compute the padding + * rx_buf_len: buffer length + * + * Figure out the size of half page based on given buffer length and + * then subtract the skb_shared_info followed by subtraction of the + * actual buffer length; this in turn results in the actual space that + * is left for padding usage + */ +static inline int ice_compute_pad(int rx_buf_len) +{ + int half_page_size; + + half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); + return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len; +} + +/** + * ice_skb_pad - determine the padding that we can supply + * + * Figure out the right Rx buffer size and based on that calculate the + * padding + */ +static inline int ice_skb_pad(void) +{ + int rx_buf_len; + + /* If a 2K buffer cannot handle a standard Ethernet frame then + * optimize padding for a 3K buffer instead of a 1.5K buffer. + * + * For a 3K buffer we need to add enough padding to allow for + * tailroom due to NET_IP_ALIGN possibly shifting us out of + * cache-line alignment. + */ + if (ICE_2K_TOO_SMALL_WITH_PADDING) + rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); + else + rx_buf_len = ICE_RXBUF_1536; + + /* if needed make room for NET_IP_ALIGN */ + rx_buf_len -= NET_IP_ALIGN; + + return ice_compute_pad(rx_buf_len); +} + +#define ICE_SKB_PAD ice_skb_pad() +#else +#define ICE_2K_TOO_SMALL_WITH_PADDING false +#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#endif + /* We are assuming that the cache line is always 64 Bytes here for ice. * In order to make sure that is a correct assumption there is a check in probe * to print a warning if the read from GLPCI_CNF2 tells us that the cache line @@ -49,12 +118,24 @@ #define ICE_TX_FLAGS_VLAN_PR_S 29 #define ICE_TX_FLAGS_VLAN_S 16 +#define ICE_XDP_PASS 0 +#define ICE_XDP_CONSUMED BIT(0) +#define ICE_XDP_TX BIT(1) +#define ICE_XDP_REDIR BIT(2) + #define ICE_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +#define ICE_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) + +#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS) + struct ice_tx_buf { struct ice_tx_desc *next_to_watch; - struct sk_buff *skb; + union { + struct sk_buff *skb; + void *raw_buf; /* used for XDP */ + }; unsigned int bytecount; unsigned short gso_segs; u32 tx_flags; @@ -76,9 +157,17 @@ struct ice_tx_offload_params { struct ice_rx_buf { struct sk_buff *skb; dma_addr_t dma; - struct page *page; - unsigned int page_offset; - u16 pagecnt_bias; + union { + struct { + struct page *page; + unsigned int page_offset; + u16 pagecnt_bias; + }; + struct { + void *addr; + u64 handle; + }; + }; }; struct ice_q_stats { @@ -198,18 +287,44 @@ struct ice_ring { }; struct rcu_head rcu; /* to avoid race on free */ + struct bpf_prog *xdp_prog; + struct xdp_umem *xsk_umem; + struct zero_copy_allocator zca; + /* CL3 - 3rd cacheline starts here */ + struct xdp_rxq_info xdp_rxq; /* CLX - the below items are only accessed infrequently and should be * in their own cache line if possible */ +#define ICE_TX_FLAGS_RING_XDP BIT(0) +#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) + u8 flags; dma_addr_t dma; /* physical address of ring */ unsigned int size; /* length of descriptor ring in bytes */ u32 txq_teid; /* Added Tx queue TEID */ u16 rx_buf_len; -#ifdef CONFIG_DCB u8 dcb_tc; /* Traffic class of ring */ -#endif /* CONFIG_DCB */ } ____cacheline_internodealigned_in_smp; +static inline bool ice_ring_uses_build_skb(struct ice_ring *ring) +{ + return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB); +} + +static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring) +{ + ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB; +} + +static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring) +{ + ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; +} + +static inline bool ice_ring_is_xdp(struct ice_ring *ring) +{ + return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); +} + struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; @@ -230,6 +345,19 @@ struct ice_ring_container { #define ice_for_each_ring(pos, head) \ for (pos = (head).ring; pos; pos = pos->next) +static inline unsigned int ice_rx_pg_order(struct ice_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring->rx_buf_len > (PAGE_SIZE / 2)) + return 1; +#endif + return 0; +} + +#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring)) + +union ice_32b_rx_flex_desc; + bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count); netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); void ice_clean_tx_ring(struct ice_ring *tx_ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c new file mode 100644 index 000000000000..35bbc4ff603c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include "ice_txrx_lib.h" + +/** + * ice_release_rx_desc - Store the new tail and head values + * @rx_ring: ring to bump + * @val: new head index + */ +void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) +{ + u16 prev_ntu = rx_ring->next_to_use; + + rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; + + /* QRX_TAIL will be updated with any tail value, but hardware ignores + * the lower 3 bits. This makes it so we only bump tail on meaningful + * boundaries. Also, this allows us to bump tail on intervals of 8 up to + * the budget depending on the current traffic load. + */ + val &= ~0x7; + if (prev_ntu != val) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(val, rx_ring->tail); + } +} + +/** + * ice_ptype_to_htype - get a hash type + * @ptype: the ptype value from the descriptor + * + * Returns a hash type to be used by skb_set_hash + */ +static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) +{ + return PKT_HASH_TYPE_NONE; +} + +/** + * ice_rx_hash - set the hash value in the skb + * @rx_ring: descriptor ring + * @rx_desc: specific descriptor + * @skb: pointer to current skb + * @rx_ptype: the ptype value from the descriptor + */ +static void +ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 rx_ptype) +{ + struct ice_32b_rx_flex_desc_nic *nic_mdid; + u32 hash; + + if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) + return; + + if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) + return; + + nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; + hash = le32_to_cpu(nic_mdid->rss_hash); + skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); +} + +/** + * ice_rx_csum - Indicate in skb if checksum is good + * @ring: the ring we care about + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: the packet type decoded by hardware + * + * skb->protocol must be set before this function is called + */ +static void +ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, + union ice_32b_rx_flex_desc *rx_desc, u8 ptype) +{ + struct ice_rx_ptype_decoded decoded; + u32 rx_error, rx_status; + bool ipv4, ipv6; + + rx_status = le16_to_cpu(rx_desc->wb.status_error0); + rx_error = rx_status; + + decoded = ice_decode_rx_desc_ptype(ptype); + + /* Start with CHECKSUM_NONE and by default csum_level = 0 */ + skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + + /* check if Rx checksum is enabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* check if HW has decoded the packet and checksum */ + if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) + return; + + if (!(decoded.known && decoded.outer_ip)) + return; + + ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + + if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) + goto checksum_fail; + else if (ipv6 && (rx_status & + (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) + goto checksum_fail; + + /* check for L4 errors and handle packets that were not able to be + * checksummed due to arrival speed + */ + if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) + goto checksum_fail; + + /* Only report checksum unnecessary for TCP, UDP, or SCTP */ + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + case ICE_RX_PTYPE_INNER_PROT_UDP: + case ICE_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + default: + break; + } + return; + +checksum_fail: + ring->vsi->back->hw_csum_rx_error++; +} + +/** + * ice_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: Rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * @ptype: the packet type decoded by hardware + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +void +ice_process_skb_fields(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 ptype) +{ + ice_rx_hash(rx_ring, rx_desc, skb, ptype); + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + + ice_rx_csum(rx_ring, skb, rx_desc, ptype); +} + +/** + * ice_receive_skb - Send a completed packet up the stack + * @rx_ring: Rx ring in play + * @skb: packet to send up + * @vlan_tag: VLAN tag for packet + * + * This function sends the completed packet (via. skb) up the stack using + * gro receive functions (with/without VLAN tag) + */ +void +ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) +{ + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + napi_gro_receive(&rx_ring->q_vector->napi, skb); +} + +/** + * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission + * @data: packet data pointer + * @size: packet data size + * @xdp_ring: XDP ring for transmission + */ +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) +{ + u16 i = xdp_ring->next_to_use; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + dma_addr_t dma; + + if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { + xdp_ring->tx_stats.tx_busy++; + return ICE_XDP_CONSUMED; + } + + dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(xdp_ring->dev, dma)) + return ICE_XDP_CONSUMED; + + tx_buf = &xdp_ring->tx_buf[i]; + tx_buf->bytecount = size; + tx_buf->gso_segs = 1; + tx_buf->raw_buf = data; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + tx_desc = ICE_TX_DESC(xdp_ring, i); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0, + size, 0); + + /* Make certain all of the status bits have been updated + * before next_to_watch is written. + */ + smp_wmb(); + + i++; + if (i == xdp_ring->count) + i = 0; + + tx_buf->next_to_watch = tx_desc; + xdp_ring->next_to_use = i; + + return ICE_XDP_TX; +} + +/** + * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it + * @xdp: XDP buffer + * @xdp_ring: XDP Tx ring + * + * Returns negative on failure, 0 on success. + */ +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring) +{ + struct xdp_frame *xdpf = convert_to_xdp_frame(xdp); + + if (unlikely(!xdpf)) + return ICE_XDP_CONSUMED; + + return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); +} + +/** + * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map + * @rx_ring: Rx ring + * @xdp_res: Result of the receive batch + * + * This function bumps XDP Tx tail and/or flush redirect map, and + * should be called when a batch of packets has been processed in the + * napi loop. + */ +void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res) +{ + if (xdp_res & ICE_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_res & ICE_XDP_TX) { + struct ice_ring *xdp_ring = + rx_ring->vsi->xdp_rings[rx_ring->q_index]; + + ice_xdp_ring_update_tail(xdp_ring); + } +} diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h new file mode 100644 index 000000000000..ba9164dad9ae --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_TXRX_LIB_H_ +#define _ICE_TXRX_LIB_H_ +#include "ice.h" + +/** + * ice_test_staterr - tests bits in Rx descriptor status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_len doesn't need to be shifted because it begins + * at offset zero. + */ +static inline bool +ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits) +{ + return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits)); +} + +static inline __le64 +build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) +{ + return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | + (td_cmd << ICE_TXD_QW1_CMD_S) | + (td_offset << ICE_TXD_QW1_OFFSET_S) | + ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | + (td_tag << ICE_TXD_QW1_L2TAG1_S)); +} + +/** + * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register + * @xdp_ring: XDP Tx ring + * + * This function updates the XDP Tx ring tail register. + */ +static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) +{ + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); +} + +void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res); +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring); +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring); +void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val); +void +ice_process_skb_fields(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 ptype); +void +ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); +#endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 6667d17a4206..c4854a987130 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -19,6 +19,17 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc) return test_bit(tc, &bitmap); } +static inline u64 round_up_64bit(u64 a, u32 b) +{ + return div64_long(((a) + (b) / 2), (b)); +} + +static inline u32 ice_round_to_num(u32 N, u32 R) +{ + return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) : + ((((N) + (R) - 1) / (R)) * (R))); +} + /* Driver always calls main vsi_handle first */ #define ICE_MAIN_VSI_HANDLE 0 @@ -35,6 +46,8 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc) #define ICE_DBG_PKG BIT_ULL(16) #define ICE_DBG_RES BIT_ULL(17) #define ICE_DBG_AQ_MSG BIT_ULL(24) +#define ICE_DBG_AQ_DESC BIT_ULL(25) +#define ICE_DBG_AQ_DESC_BUF BIT_ULL(26) #define ICE_DBG_AQ_CMD BIT_ULL(27) #define ICE_DBG_USER BIT_ULL(31) @@ -189,6 +202,7 @@ struct ice_hw_dev_caps { struct ice_hw_common_caps common_cap; u32 num_vfs_exposed; /* Total number of VFs exposed */ u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ + u32 num_funcs; }; /* MAC info */ @@ -272,10 +286,56 @@ enum ice_agg_type { ICE_AGG_TYPE_QG }; +/* Rate limit types */ +enum ice_rl_type { + ICE_UNKNOWN_BW = 0, + ICE_MIN_BW, /* for CIR profile */ + ICE_MAX_BW, /* for EIR profile */ + ICE_SHARED_BW /* for shared profile */ +}; + +#define ICE_SCHED_MIN_BW 500 /* in Kbps */ +#define ICE_SCHED_MAX_BW 100000000 /* in Kbps */ +#define ICE_SCHED_DFLT_BW 0xFFFFFFFF /* unlimited */ #define ICE_SCHED_DFLT_RL_PROF_ID 0 +#define ICE_SCHED_NO_SHARED_RL_PROF_ID 0xFFFF #define ICE_SCHED_DFLT_BW_WT 1 +#define ICE_SCHED_INVAL_PROF_ID 0xFFFF +#define ICE_SCHED_DFLT_BURST_SIZE (15 * 1024) /* in bytes (15k) */ -/* VSI type list entry to locate corresponding VSI/ag nodes */ + /* Data structure for saving BW information */ +enum ice_bw_type { + ICE_BW_TYPE_PRIO, + ICE_BW_TYPE_CIR, + ICE_BW_TYPE_CIR_WT, + ICE_BW_TYPE_EIR, + ICE_BW_TYPE_EIR_WT, + ICE_BW_TYPE_SHARED, + ICE_BW_TYPE_CNT /* This must be last */ +}; + +struct ice_bw { + u32 bw; + u16 bw_alloc; +}; + +struct ice_bw_type_info { + DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT); + u8 generic; + struct ice_bw cir_bw; + struct ice_bw eir_bw; + u32 shared_bw; +}; + +/* VSI queue context structure for given TC */ +struct ice_q_ctx { + u16 q_handle; + u32 q_teid; + /* bw_t_info saves queue BW information */ + struct ice_bw_type_info bw_t_info; +}; + +/* VSI type list entry to locate corresponding VSI/aggregator nodes */ struct ice_sched_vsi_info { struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS]; struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS]; @@ -364,6 +424,8 @@ struct ice_port_info { struct mutex sched_lock; /* protect access to TXSched tree */ struct ice_sched_node * sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM]; + /* List contain profile ID(s) and other params per layer */ + struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM]; struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */ /* DCBX info */ struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */ @@ -415,6 +477,8 @@ struct ice_hw { u8 pf_id; /* device profile info */ + u16 max_burst_size; /* driver sets this value */ + /* Tx Scheduler values */ u16 num_tx_sched_layers; u16 num_tx_sched_phys_layers; @@ -555,6 +619,8 @@ struct ice_hw_port_stats { }; /* Checksum and Shadow RAM pointers */ +#define ICE_SR_BOOT_CFG_PTR 0x132 +#define ICE_NVM_OEM_VER_OFF 0x02 #define ICE_SR_NVM_DEV_STARTER_VER 0x18 #define ICE_SR_NVM_EETRACK_LO 0x2D #define ICE_SR_NVM_EETRACK_HI 0x2E @@ -568,6 +634,7 @@ struct ice_hw_port_stats { #define ICE_OEM_VER_BUILD_MASK (0xffff << ICE_OEM_VER_BUILD_SHIFT) #define ICE_OEM_VER_SHIFT 24 #define ICE_OEM_VER_MASK (0xff << ICE_OEM_VER_SHIFT) +#define ICE_SR_PFA_PTR 0x40 #define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 #define ICE_SR_WORDS_IN_1KB 512 diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index b45797f39b2f..edb374296d1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2,9 +2,39 @@ /* Copyright (c) 2018, Intel Corporation. */ #include "ice.h" +#include "ice_base.h" #include "ice_lib.h" /** + * ice_validate_vf_id - helper to check if VF ID is valid + * @pf: pointer to the PF structure + * @vf_id: the ID of the VF to check + */ +static int ice_validate_vf_id(struct ice_pf *pf, int vf_id) +{ + if (vf_id >= pf->num_alloc_vfs) { + dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %d\n", vf_id); + return -EINVAL; + } + return 0; +} + +/** + * ice_check_vf_init - helper to check if VF init complete + * @pf: pointer to the PF structure + * @vf: the pointer to the VF to check + */ +static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) +{ + if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + dev_err(ice_pf_to_dev(pf), "VF ID: %d in reset. Try again.\n", + vf->vf_id); + return -EBUSY; + } + return 0; +} + +/** * ice_err_to_virt err - translate errors for VF return code * @ice_err: error return code */ @@ -184,12 +214,14 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) { struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + struct device *dev; int first, last, v; struct ice_hw *hw; hw = &pf->hw; vsi = pf->vsi[vf->lan_vsi_idx]; + dev = ice_pf_to_dev(pf); wr32(hw, VPINT_ALLOC(vf->vf_id), 0); wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); @@ -208,13 +240,12 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0); else - dev_err(&pf->pdev->dev, - "Scattered mode for VF Tx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0); else - dev_err(&pf->pdev->dev, + dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); } @@ -289,6 +320,7 @@ static void ice_dis_vf_qs(struct ice_vf *vf) */ void ice_free_vfs(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int tmp, i; @@ -310,24 +342,24 @@ void ice_free_vfs(struct ice_pf *pf) if (!pci_vfs_assigned(pf->pdev)) pci_disable_sriov(pf->pdev); else - dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n"); + dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); tmp = pf->num_alloc_vfs; pf->num_vf_qps = 0; pf->num_alloc_vfs = 0; for (i = 0; i < tmp; i++) { if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) { - /* disable VF qp mappings */ + /* disable VF qp mappings and set VF disable state */ ice_dis_vf_mappings(&pf->vf[i]); + set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); ice_free_vf_res(&pf->vf[i]); } } if (ice_sriov_free_msix_res(pf)) - dev_err(&pf->pdev->dev, - "Failed to free MSIX resources used by SR-IOV\n"); + dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); - devm_kfree(&pf->pdev->dev, pf->vf); + devm_kfree(dev, pf->vf); pf->vf = NULL; /* This check is for when the driver is unloaded while VFs are @@ -366,9 +398,11 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) { struct ice_pf *pf = vf->pf; u32 reg, reg_idx, bit_idx; + struct device *dev; struct ice_hw *hw; int vf_abs_id, i; + dev = ice_pf_to_dev(pf); hw = &pf->hw; vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id; @@ -389,7 +423,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) * by the time we get here. */ if (!is_pfr) - wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0); + wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0); /* In the case of a VFLR, the HW has already reset the VF and we * just need to clean up, so don't hit the VFRTRIG register. @@ -414,7 +448,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) if ((reg & VF_TRANS_PENDING_M) == 0) break; - dev_err(&pf->pdev->dev, + dev_err(dev, "VF %d PCI transactions stuck\n", vf->vf_id); udelay(ICE_PCI_CIAD_WAIT_DELAY_US); } @@ -457,13 +491,12 @@ static void ice_vsi_kill_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt) */ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) { - struct device *dev = &vsi->back->pdev->dev; struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; enum ice_status status; int ret = 0; - ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL); + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return -ENOMEM; @@ -475,7 +508,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_info(dev, "update VSI for port VLAN failed, err %d aq_err %d\n", + dev_info(&vsi->back->pdev->dev, "update VSI for port VLAN failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -483,7 +516,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) vsi->info = ctxt->info; out: - devm_kfree(dev, ctxt); + kfree(ctxt); return ret; } @@ -531,14 +564,16 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) LIST_HEAD(tmp_add_list); u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; + struct device *dev; int status = 0; + dev = ice_pf_to_dev(pf); /* first vector index is the VFs OICR index */ vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf); vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id); if (!vsi) { - dev_err(&pf->pdev->dev, "Failed to create VF VSI\n"); + dev_err(dev, "Failed to create VF VSI\n"); return -ENOMEM; } @@ -566,8 +601,7 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) status = ice_add_mac(&pf->hw, &tmp_add_list); if (status) - dev_err(&pf->pdev->dev, - "could not add mac filters error %d\n", status); + dev_err(dev, "could not add mac filters error %d\n", status); else vf->num_mac = 1; @@ -578,7 +612,7 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) * more vectors. */ ice_alloc_vsi_res_exit: - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + ice_free_fltr_list(dev, &tmp_add_list); return status; } @@ -634,10 +668,12 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) int abs_vf_id, abs_first, abs_last; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + struct device *dev; int first, last, v; struct ice_hw *hw; u32 reg; + dev = ice_pf_to_dev(pf); hw = &pf->hw; vsi = pf->vsi[vf->lan_vsi_idx]; first = vf->first_vector_idx; @@ -685,8 +721,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) VPLAN_TX_QBASE_VFNUMQ_M)); wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg); } else { - dev_err(&pf->pdev->dev, - "Scattered mode for VF Tx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); } /* set regardless of mapping mode */ @@ -704,8 +739,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) VPLAN_RX_QBASE_VFNUMQ_M)); wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg); } else { - dev_err(&pf->pdev->dev, - "Scattered mode for VF Rx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); } } @@ -851,6 +885,7 @@ static int ice_check_avail_res(struct ice_pf *pf) { int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); u16 num_msix, num_txq, num_rxq, num_avail_msix; + struct device *dev = ice_pf_to_dev(pf); if (!pf->num_alloc_vfs || max_valid_res_idx < 0) return -EINVAL; @@ -883,8 +918,7 @@ static int ice_check_avail_res(struct ice_pf *pf) ICE_DFLT_INTR_PER_VF, ICE_MIN_INTR_PER_VF); } else { - dev_err(&pf->pdev->dev, - "Number of VFs %d exceeds max VF count %d\n", + dev_err(dev, "Number of VFs %d exceeds max VF count %d\n", pf->num_alloc_vfs, ICE_MAX_VF_COUNT); return -EIO; } @@ -1022,12 +1056,12 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m, */ static bool ice_config_res_vfs(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int v; if (ice_check_avail_res(pf)) { - dev_err(&pf->pdev->dev, - "Cannot allocate VF resources, try with fewer number of VFs\n"); + dev_err(dev, "Cannot allocate VF resources, try with fewer number of VFs\n"); return false; } @@ -1040,9 +1074,8 @@ static bool ice_config_res_vfs(struct ice_pf *pf) struct ice_vf *vf = &pf->vf[v]; vf->num_vf_qs = pf->num_vf_qps; - dev_dbg(&pf->pdev->dev, - "VF-id %d has %d queues configured\n", - vf->vf_id, vf->num_vf_qs); + dev_dbg(dev, "VF-id %d has %d queues configured\n", vf->vf_id, + vf->num_vf_qs); ice_cleanup_and_realloc_vf(vf); } @@ -1066,6 +1099,7 @@ static bool ice_config_res_vfs(struct ice_pf *pf) */ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; struct ice_vf *vf; int v, i; @@ -1124,7 +1158,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) * time, but continue on with the operation. */ if (v < pf->num_alloc_vfs) - dev_warn(&pf->pdev->dev, "VF reset check timeout\n"); + dev_warn(dev, "VF reset check timeout\n"); /* free VF resources to begin resetting the VSI state */ for (v = 0; v < pf->num_alloc_vfs; v++) { @@ -1141,8 +1175,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) } if (ice_sriov_free_msix_res(pf)) - dev_err(&pf->pdev->dev, - "Failed to free MSIX resources used by SR-IOV\n"); + dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); if (!ice_config_res_vfs(pf)) return false; @@ -1151,6 +1184,25 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) } /** + * ice_is_vf_disabled + * @vf: pointer to the VF info + * + * Returns true if the PF or VF is disabled, false otherwise. + */ +static bool ice_is_vf_disabled(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + + /* If the PF has been disabled, there is no need resetting VF until + * PF is active again. Similarly, if the VF has been disabled, this + * means something else is resetting the VF, so we shouldn't continue. + * Otherwise, set disable VF state bit for actual reset, and continue. + */ + return (test_bit(__ICE_VF_DIS, pf->state) || + test_bit(ICE_VF_STATE_DIS, vf->vf_states)); +} + +/** * ice_reset_vf - Reset a particular VF * @vf: pointer to the VF structure * @is_vflr: true if VFLR was issued, false if not @@ -1161,25 +1213,23 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) { struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; + struct device *dev; struct ice_hw *hw; bool rsd = false; u8 promisc_m; u32 reg; int i; - /* If the PF has been disabled, there is no need resetting VF until - * PF is active again. - */ - if (test_bit(__ICE_VF_DIS, pf->state)) - return false; + dev = ice_pf_to_dev(pf); - /* If the VF has been disabled, this means something else is - * resetting the VF, so we shouldn't continue. Otherwise, set - * disable VF state bit for actual reset, and continue. - */ - if (test_and_set_bit(ICE_VF_STATE_DIS, vf->vf_states)) - return false; + if (ice_is_vf_disabled(vf)) { + dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", + vf->vf_id); + return true; + } + /* Set VF disable bit state here, before triggering reset */ + set_bit(ICE_VF_STATE_DIS, vf->vf_states); ice_trigger_vf_reset(vf, is_vflr, false); vsi = pf->vsi[vf->lan_vsi_idx]; @@ -1216,8 +1266,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) * continue on with the operation. */ if (!rsd) - dev_warn(&pf->pdev->dev, "VF reset check timeout on VF %d\n", - vf->vf_id); + dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id); /* disable promiscuous modes in case they were enabled * ignore any error if disabling process failed @@ -1231,7 +1280,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) vsi = pf->vsi[vf->lan_vsi_idx]; if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true)) - dev_err(&pf->pdev->dev, "disabling promiscuous mode failed\n"); + dev_err(dev, "disabling promiscuous mode failed\n"); } /* free VF resources to begin resetting the VSI state */ @@ -1282,19 +1331,26 @@ void ice_vc_notify_reset(struct ice_pf *pf) static void ice_vc_notify_vf_reset(struct ice_vf *vf) { struct virtchnl_pf_event pfe; + struct ice_pf *pf; + + if (!vf) + return; - /* validate the request */ - if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs) + pf = vf->pf; + if (ice_validate_vf_id(pf, vf->vf_id)) return; - /* verify if the VF is in either init or active before proceeding */ - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && - !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + /* Bail out if VF is in disabled state, neither initialized, nor active + * state - otherwise proceed with notifications + */ + if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && + !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) || + test_bit(ICE_VF_STATE_DIS, vf->vf_states)) return; pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM; - ice_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT, + ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe), NULL); } @@ -1306,6 +1362,7 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf) */ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) { + struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; struct ice_vf *vfs; int i, ret; @@ -1322,8 +1379,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) goto err_unroll_intr; } /* allocate memory */ - vfs = devm_kcalloc(&pf->pdev->dev, num_alloc_vfs, sizeof(*vfs), - GFP_KERNEL); + vfs = devm_kcalloc(dev, num_alloc_vfs, sizeof(*vfs), GFP_KERNEL); if (!vfs) { ret = -ENOMEM; goto err_pci_disable_sriov; @@ -1352,7 +1408,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) err_unroll_sriov: pf->vf = NULL; - devm_kfree(&pf->pdev->dev, vfs); + devm_kfree(dev, vfs); vfs = NULL; pf->num_alloc_vfs = 0; err_pci_disable_sriov: @@ -1397,7 +1453,7 @@ static bool ice_pf_state_is_nominal(struct ice_pf *pf) static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) { int pre_existing_vfs = pci_num_vf(pf->pdev); - struct device *dev = &pf->pdev->dev; + struct device *dev = ice_pf_to_dev(pf); int err; if (!ice_pf_state_is_nominal(pf)) { @@ -1407,7 +1463,7 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) { dev_err(dev, "This device is not capable of SR-IOV\n"); - return -ENODEV; + return -EOPNOTSUPP; } if (pre_existing_vfs && pre_existing_vfs != num_vfs) @@ -1442,10 +1498,10 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct ice_pf *pf = pci_get_drvdata(pdev); + struct device *dev = ice_pf_to_dev(pf); if (ice_is_safe_mode(pf)) { - dev_err(&pf->pdev->dev, - "SR-IOV cannot be configured - Device is in Safe Mode\n"); + dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n"); return -EOPNOTSUPP; } @@ -1455,8 +1511,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pdev)) { ice_free_vfs(pf); } else { - dev_err(&pf->pdev->dev, - "can't free VFs because some are assigned to VMs.\n"); + dev_err(dev, "can't free VFs because some are assigned to VMs.\n"); return -EBUSY; } @@ -1495,12 +1550,10 @@ void ice_process_vflr_event(struct ice_pf *pf) } /** - * ice_vc_dis_vf - Disable a given VF via SW reset + * ice_vc_reset_vf - Perform software reset on the VF after informing the AVF * @vf: pointer to the VF info - * - * Disable the VF through a SW reset */ -static void ice_vc_dis_vf(struct ice_vf *vf) +static void ice_vc_reset_vf(struct ice_vf *vf) { ice_vc_notify_vf_reset(vf); ice_reset_vf(vf, false); @@ -1521,24 +1574,28 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { enum ice_status aq_ret; + struct device *dev; struct ice_pf *pf; - /* validate the request */ - if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs) + if (!vf) return -EINVAL; pf = vf->pf; + if (ice_validate_vf_id(pf, vf->vf_id)) + return -EINVAL; + + dev = ice_pf_to_dev(pf); /* single place to detect unsuccessful return values */ if (v_retval) { vf->num_inval_msgs++; - dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n", - vf->vf_id, v_opcode, v_retval); + dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id, + v_opcode, v_retval); if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) { - dev_err(&pf->pdev->dev, + dev_err(dev, "Number of invalid messages exceeded for VF %d\n", vf->vf_id); - dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n"); + dev_err(dev, "Use PF Control I/F to enable the VF\n"); set_bit(ICE_VF_STATE_DIS, vf->vf_states); return -EIO; } @@ -1551,7 +1608,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval, msg, msglen, NULL); if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) { - dev_info(&pf->pdev->dev, + dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %d\n", vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status); return -EIO; @@ -1599,14 +1656,14 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) int len = 0; int ret; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + if (ice_check_vf_init(pf, vf)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto err; } len = sizeof(struct virtchnl_vf_resource); - vfres = devm_kzalloc(&pf->pdev->dev, len, GFP_KERNEL); + vfres = kzalloc(len, GFP_KERNEL); if (!vfres) { v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; len = 0; @@ -1672,6 +1729,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) ether_addr_copy(vfres->vsi_res[0].default_mac_addr, vf->dflt_lan_addr.addr); + /* match guest capabilities */ + vf->driver_caps = vfres->vf_cap_flags; + set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); err: @@ -1679,7 +1739,7 @@ err: ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret, (u8 *)vfres, len); - devm_kfree(&pf->pdev->dev, vfres); + kfree(vfres); return ret; } @@ -1775,7 +1835,7 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi = NULL; + struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -1822,7 +1882,7 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi = NULL; + struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -1869,8 +1929,8 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; + struct ice_eth_stats stats = { 0 }; struct ice_pf *pf = vf->pf; - struct ice_eth_stats stats; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -1889,7 +1949,6 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - memset(&stats, 0, sizeof(struct ice_eth_stats)); ice_update_eth_stats(vsi); stats = vsi->eth_stats; @@ -2159,9 +2218,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) vector_id = map->vector_id; vsi_id = map->vsi_id; - /* validate msg params */ - if (!(vector_id < pf->hw.func_caps.common_cap - .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) || + /* vector_id is always 0-based for each VF, and can never be + * larger than or equal to the max allowed interrupts per VF + */ + if (!(vector_id < ICE_MAX_INTR_PER_VF) || + !ice_vc_isvalid_vsi_id(vf, vsi_id) || (!vector_id && (map->rxq_map || map->txq_map))) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2252,7 +2313,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF || qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(&pf->pdev->dev, + dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2365,9 +2426,12 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) enum virtchnl_ops vc_op; enum ice_status status; struct ice_vsi *vsi; + struct device *dev; int mac_count = 0; int i; + dev = ice_pf_to_dev(pf); + if (set) vc_op = VIRTCHNL_OP_ADD_ETH_ADDR; else @@ -2381,7 +2445,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) if (set && !ice_is_vf_trusted(vf) && (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) { - dev_err(&pf->pdev->dev, + dev_err(dev, "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n", vf->vf_id); /* There is no need to let VF know about not being trusted @@ -2406,13 +2470,13 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) /* VF is trying to add filters that the PF * already added. Just continue. */ - dev_info(&pf->pdev->dev, + dev_info(dev, "MAC %pM already set for VF %d\n", maddr, vf->vf_id); continue; } else { /* VF can't remove dflt_lan_addr/bcast MAC */ - dev_err(&pf->pdev->dev, + dev_err(dev, "VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n", maddr, vf->vf_id); continue; @@ -2421,7 +2485,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) /* check for the invalid cases and bail if necessary */ if (is_zero_ether_addr(maddr)) { - dev_err(&pf->pdev->dev, + dev_err(dev, "invalid MAC %pM provided for VF %d\n", maddr, vf->vf_id); v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2430,7 +2494,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) if (is_unicast_ether_addr(maddr) && !ice_can_vf_change_mac(vf)) { - dev_err(&pf->pdev->dev, + dev_err(dev, "can't change unicast MAC for untrusted VF %d\n", vf->vf_id); v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2441,12 +2505,12 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) status = ice_vsi_cfg_mac_fltr(vsi, maddr, set); if (status == ICE_ERR_DOES_NOT_EXIST || status == ICE_ERR_ALREADY_EXISTS) { - dev_info(&pf->pdev->dev, + dev_info(dev, "can't %s MAC filters %pM for VF %d, error %d\n", set ? "add" : "remove", maddr, vf->vf_id, status); } else if (status) { - dev_err(&pf->pdev->dev, + dev_err(dev, "can't %s MAC filters for VF %d, error %d\n", set ? "add" : "remove", vf->vf_id, status); v_ret = ice_err_to_virt_err(status); @@ -2511,7 +2575,9 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) u16 max_allowed_vf_queues; u16 tx_rx_queue_left; u16 cur_queues; + struct device *dev; + dev = ice_pf_to_dev(pf); if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2522,17 +2588,15 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) ice_get_avail_rxq_count(pf)); max_allowed_vf_queues = tx_rx_queue_left + cur_queues; if (!req_queues) { - dev_err(&pf->pdev->dev, - "VF %d tried to request 0 queues. Ignoring.\n", + dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n", vf->vf_id); } else if (req_queues > ICE_MAX_BASE_QS_PER_VF) { - dev_err(&pf->pdev->dev, - "VF %d tried to request more than %d queues.\n", + dev_err(dev, "VF %d tried to request more than %d queues.\n", vf->vf_id, ICE_MAX_BASE_QS_PER_VF); vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF; } else if (req_queues > cur_queues && req_queues - cur_queues > tx_rx_queue_left) { - dev_warn(&pf->pdev->dev, + dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n", vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues, @@ -2540,9 +2604,8 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) } else { /* request is successful, then reset VF */ vf->num_req_qs = req_queues; - ice_vc_dis_vf(vf); - dev_info(&pf->pdev->dev, - "VF %d granted request of %u queues.\n", + ice_vc_reset_vf(vf); + dev_info(dev, "VF %d granted request of %u queues.\n", vf->vf_id, req_queues); return 0; } @@ -2568,39 +2631,34 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, __be16 vlan_proto) { u16 vlanprio = vlan_id | (qos << ICE_VLAN_PRIORITY_S); - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vsi *vsi; + struct device *dev; struct ice_vf *vf; int ret = 0; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id); + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } if (vlan_id > ICE_MAX_VLANID || qos > 7) { - dev_err(&pf->pdev->dev, "Invalid VF Parameters\n"); + dev_err(dev, "Invalid VF Parameters\n"); return -EINVAL; } if (vlan_proto != htons(ETH_P_8021Q)) { - dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n"); + dev_err(dev, "VF VLAN protocol is not supported\n"); return -EPROTONOSUPPORT; } vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id); + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } if (le16_to_cpu(vsi->info.pvid) == vlanprio) { /* duplicate request, so just return success */ - dev_info(&pf->pdev->dev, - "Duplicate pvid %d request\n", vlanprio); + dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio); return ret; } @@ -2619,7 +2677,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, } if (vlan_id) { - dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n", + dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n", vlan_id, qos, vf_id); /* add new VLAN filter for each MAC */ @@ -2638,6 +2696,17 @@ error_set_pvid: } /** + * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads + * @caps: VF driver negotiated capabilities + * + * Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false + */ +static bool ice_vf_vlan_offload_ena(u32 caps) +{ + return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN); +} + +/** * ice_vc_process_vlan_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2653,16 +2722,23 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) struct ice_pf *pf = vf->pf; bool vlan_promisc = false; struct ice_vsi *vsi; + struct device *dev; struct ice_hw *hw; int status = 0; u8 promisc_m; int i; + dev = ice_pf_to_dev(pf); if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } + if (!ice_vf_vlan_offload_ena(vf->driver_caps)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2670,7 +2746,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (add_v && !ice_is_vf_trusted(vf) && vf->num_vlan >= ICE_MAX_VLAN_PER_VF) { - dev_info(&pf->pdev->dev, + dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); /* There is no need to let VF know about being not trusted, @@ -2682,7 +2758,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) for (i = 0; i < vfl->num_elements; i++) { if (vfl->vlan_id[i] > ICE_MAX_VLANID) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(&pf->pdev->dev, + dev_err(dev, "invalid VF VLAN id %d\n", vfl->vlan_id[i]); goto error_param; } @@ -2700,14 +2776,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } - if (ice_vsi_manage_vlan_stripping(vsi, add_v)) { - dev_err(&pf->pdev->dev, - "%sable VLAN stripping failed for VSI %i\n", - add_v ? "en" : "dis", vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) vlan_promisc = true; @@ -2718,7 +2786,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (!ice_is_vf_trusted(vf) && vf->num_vlan >= ICE_MAX_VLAN_PER_VF) { - dev_info(&pf->pdev->dev, + dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); /* There is no need to let VF know about being @@ -2739,7 +2807,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) status = ice_cfg_vlan_pruning(vsi, true, false); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(&pf->pdev->dev, + dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", vid, status); goto error_param; @@ -2753,7 +2821,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) promisc_m, vid); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(&pf->pdev->dev, + dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", vid, status); } @@ -2848,6 +2916,11 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf) goto error_param; } + if (!ice_vf_vlan_offload_ena(vf->driver_caps)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + vsi = pf->vsi[vf->lan_vsi_idx]; if (ice_vsi_manage_vlan_stripping(vsi, true)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2874,6 +2947,11 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf) goto error_param; } + if (!ice_vf_vlan_offload_ena(vf->driver_caps)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2889,6 +2967,33 @@ error_param: } /** + * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization + * @vf: VF to enable/disable VLAN stripping for on initialization + * + * If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if + * the flag is cleared then we want to disable stripping. For example, the flag + * will be cleared when port VLANs are configured by the administrator before + * passing the VF to the guest or if the AVF driver doesn't support VLAN + * offloads. + */ +static int ice_vf_init_vlan_stripping(struct ice_vf *vf) +{ + struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; + + if (!vsi) + return -EINVAL; + + /* don't modify stripping if port VLAN is configured */ + if (vsi->info.pvid) + return 0; + + if (ice_vf_vlan_offload_ena(vf->driver_caps)) + return ice_vsi_manage_vlan_stripping(vsi, true); + else + return ice_vsi_manage_vlan_stripping(vsi, false); +} + +/** * ice_vc_process_vf_msg - Process request from VF * @pf: pointer to the PF structure * @event: pointer to the AQ event @@ -2903,9 +3008,11 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) u16 msglen = event->msg_len; u8 *msg = event->msg_buf; struct ice_vf *vf = NULL; + struct device *dev; int err = 0; - if (vf_id >= pf->num_alloc_vfs) { + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) { err = -EINVAL; goto error_handler; } @@ -2931,7 +3038,7 @@ error_handler: if (err) { ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); - dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", + dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", vf_id, v_opcode, msglen, err); return; } @@ -2942,6 +3049,10 @@ error_handler: break; case VIRTCHNL_OP_GET_VF_RESOURCES: err = ice_vc_get_vf_res_msg(vf, msg); + if (ice_vf_init_vlan_stripping(vf)) + dev_err(dev, + "Failed to initialize VLAN stripping for VF %d\n", + vf->vf_id); ice_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: @@ -2992,8 +3103,8 @@ error_handler: break; case VIRTCHNL_OP_UNKNOWN: default: - dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", - v_opcode, vf_id); + dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode, + vf_id); err = ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, 0); @@ -3003,8 +3114,7 @@ error_handler: /* Helper function cares less about error return values here * as it is busy with pending work. */ - dev_info(&pf->pdev->dev, - "PF failed to honor VF %d, opcode %d, error %d\n", + dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n", vf_id, v_opcode, err); } } @@ -3020,24 +3130,18 @@ error_handler: int ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi; struct ice_vf *vf; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - netdev_err(netdev, "invalid VF id: %d\n", vf_id); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id); + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } ivi->vf = vf_id; ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr); @@ -3070,33 +3174,29 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) */ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi = pf->vsi[0]; struct ice_vsi_ctx *ctx; enum ice_status status; + struct device *dev; struct ice_vf *vf; int ret = 0; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - netdev_err(netdev, "invalid VF id: %d\n", vf_id); + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id); + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } if (ena == vf->spoofchk) { - dev_dbg(&pf->pdev->dev, "VF spoofchk already %s\n", + dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF"); return 0; } - ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; @@ -3109,7 +3209,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL); if (status) { - dev_dbg(&pf->pdev->dev, + dev_dbg(dev, "Error %d, failed to update VSI* parameters\n", status); ret = -EIO; goto out; @@ -3119,11 +3219,28 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) vsi->info.sec_flags = ctx->info.sec_flags; vsi->info.sw_flags2 = ctx->info.sw_flags2; out: - devm_kfree(&pf->pdev->dev, ctx); + kfree(ctx); return ret; } /** + * ice_wait_on_vf_reset + * @vf: The VF being resseting + * + * Poll to make sure a given VF is ready after reset + */ +static void ice_wait_on_vf_reset(struct ice_vf *vf) +{ + int i; + + for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) { + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) + break; + msleep(20); + } +} + +/** * ice_set_vf_mac * @netdev: network interface device structure * @vf_id: VF identifier @@ -3133,23 +3250,25 @@ out: */ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; int ret = 0; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - netdev_err(netdev, "invalid VF id: %d\n", vf_id); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id); + /* Don't set MAC on disabled VF */ + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + /* In case VF is in reset mode, wait until it is completed. Depending + * on factors like queue disabling routine, this could take ~250ms + */ + ice_wait_on_vf_reset(vf); + + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) { netdev_err(netdev, "%pM not a valid unicast address\n", mac); @@ -3167,7 +3286,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) "MAC on VF %d set to %pM. VF driver will be reinitialized\n", vf_id, mac); - ice_vc_dis_vf(vf); + ice_vc_reset_vf(vf); return ret; } @@ -3181,30 +3300,34 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) */ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct device *dev; struct ice_vf *vf; - /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id); + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id); + /* Don't set Trusted Mode on disabled VF */ + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + /* In case VF is in reset mode, wait until it is completed. Depending + * on factors like queue disabling routine, this could take ~250ms + */ + ice_wait_on_vf_reset(vf); + + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } /* Check if already trusted */ if (trusted == vf->trusted) return 0; vf->trusted = trusted; - ice_vc_dis_vf(vf); - dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", + ice_vc_reset_vf(vf); + dev_info(dev, "VF %u is now %strusted\n", vf_id, trusted ? "" : "un"); return 0; @@ -3220,26 +3343,21 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) */ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; + struct ice_pf *pf = ice_netdev_to_pf(netdev); struct virtchnl_pf_event pfe = { 0 }; struct ice_link_status *ls; struct ice_vf *vf; struct ice_hw *hw; - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id); + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - } vf = &pf->vf[vf_id]; hw = &pf->hw; ls = &pf->hw.port_info->phy.link_info; - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(&pf->pdev->dev, "vf %d in reset. Try again.\n", vf_id); + if (ice_check_vf_init(pf, vf)) return -EBUSY; - } pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = PF_EVENT_SEVERITY_INFO; @@ -3273,3 +3391,48 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) return 0; } + +/** + * ice_get_vf_stats - populate some stats for the VF + * @netdev: the netdev of the PF + * @vf_id: the host OS identifier (0-255) + * @vf_stats: pointer to the OS memory to be initialized + */ +int ice_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_eth_stats *stats; + struct ice_vsi *vsi; + struct ice_vf *vf; + + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + + if (ice_check_vf_init(pf, vf)) + return -EBUSY; + + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) + return -EINVAL; + + ice_update_eth_stats(vsi); + stats = &vsi->eth_stats; + + memset(vf_stats, 0, sizeof(*vf_stats)); + + vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast + + stats->rx_multicast; + vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast + + stats->tx_multicast; + vf_stats->rx_bytes = stats->rx_bytes; + vf_stats->tx_bytes = stats->tx_bytes; + vf_stats->broadcast = stats->rx_broadcast; + vf_stats->multicast = stats->rx_multicast; + vf_stats->rx_dropped = stats->rx_discards; + vf_stats->tx_dropped = stats->tx_discards; + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 0d9880c8bba3..88aa65d5cb31 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -38,6 +38,7 @@ #define ICE_MAX_POLICY_INTR_PER_VF 33 #define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) +#define ICE_MAX_VF_RESET_WAIT 15 /* Specific VF states */ enum ice_vf_states { @@ -121,6 +122,9 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena); int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); void ice_set_vf_state_qs_dis(struct ice_vf *vf); +int +ice_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats); #else /* CONFIG_PCI_IOV */ #define ice_process_vflr_event(pf) do {} while (0) #define ice_free_vfs(pf) do {} while (0) @@ -193,5 +197,13 @@ ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, { return 0; } + +static inline int +ice_get_vf_stats(struct net_device __always_unused *netdev, + int __always_unused vf_id, + struct ifla_vf_stats __always_unused *vf_stats) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c new file mode 100644 index 000000000000..cf9b8b22d24f --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -0,0 +1,1181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Intel Corporation. */ + +#include <linux/bpf_trace.h> +#include <net/xdp_sock.h> +#include <net/xdp.h> +#include "ice.h" +#include "ice_base.h" +#include "ice_type.h" +#include "ice_xsk.h" +#include "ice_txrx.h" +#include "ice_txrx_lib.h" +#include "ice_lib.h" + +/** + * ice_qp_reset_stats - Resets all stats for rings of given index + * @vsi: VSI that contains rings of interest + * @q_idx: ring index in array + */ +static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) +{ + memset(&vsi->rx_rings[q_idx]->rx_stats, 0, + sizeof(vsi->rx_rings[q_idx]->rx_stats)); + memset(&vsi->tx_rings[q_idx]->stats, 0, + sizeof(vsi->tx_rings[q_idx]->stats)); + if (ice_is_xdp_ena_vsi(vsi)) + memset(&vsi->xdp_rings[q_idx]->stats, 0, + sizeof(vsi->xdp_rings[q_idx]->stats)); +} + +/** + * ice_qp_clean_rings - Cleans all the rings of a given index + * @vsi: VSI that contains rings of interest + * @q_idx: ring index in array + */ +static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) +{ + ice_clean_tx_ring(vsi->tx_rings[q_idx]); + if (ice_is_xdp_ena_vsi(vsi)) + ice_clean_tx_ring(vsi->xdp_rings[q_idx]); + ice_clean_rx_ring(vsi->rx_rings[q_idx]); +} + +/** + * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector + * @vsi: VSI that has netdev + * @q_vector: q_vector that has NAPI context + * @enable: true for enable, false for disable + */ +static void +ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, + bool enable) +{ + if (!vsi->netdev || !q_vector) + return; + + if (enable) + napi_enable(&q_vector->napi); + else + napi_disable(&q_vector->napi); +} + +/** + * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring + * @vsi: the VSI that contains queue vector being un-configured + * @rx_ring: Rx ring that will have its IRQ disabled + * @q_vector: queue vector + */ +static void +ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring, + struct ice_q_vector *q_vector) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int base = vsi->base_vector; + u16 reg; + u32 val; + + /* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle + * here only QINT_RQCTL + */ + reg = rx_ring->reg_idx; + val = rd32(hw, QINT_RQCTL(reg)); + val &= ~QINT_RQCTL_CAUSE_ENA_M; + wr32(hw, QINT_RQCTL(reg), val); + + if (q_vector) { + u16 v_idx = q_vector->v_idx; + + wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0); + ice_flush(hw); + synchronize_irq(pf->msix_entries[v_idx + base].vector); + } +} + +/** + * ice_qvec_cfg_msix - Enable IRQ for given queue vector + * @vsi: the VSI that contains queue vector + * @q_vector: queue vector + */ +static void +ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +{ + u16 reg_idx = q_vector->reg_idx; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + struct ice_ring *ring; + + ice_cfg_itr(hw, q_vector); + + wr32(hw, GLINT_RATE(reg_idx), + ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); + + ice_for_each_ring(ring, q_vector->tx) + ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx, + q_vector->tx.itr_idx); + + ice_for_each_ring(ring, q_vector->rx) + ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx, + q_vector->rx.itr_idx); + + ice_flush(hw); +} + +/** + * ice_qvec_ena_irq - Enable IRQ for given queue vector + * @vsi: the VSI that contains queue vector + * @q_vector: queue vector + */ +static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + + ice_irq_dynamic_ena(hw, vsi, q_vector); + + ice_flush(hw); +} + +/** + * ice_qp_dis - Disables a queue pair + * @vsi: VSI of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) +{ + struct ice_txq_meta txq_meta = { }; + struct ice_ring *tx_ring, *rx_ring; + struct ice_q_vector *q_vector; + int timeout = 50; + int err; + + if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) + return -EINVAL; + + tx_ring = vsi->tx_rings[q_idx]; + rx_ring = vsi->rx_rings[q_idx]; + q_vector = rx_ring->q_vector; + + while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + + ice_fill_txq_meta(vsi, tx_ring, &txq_meta); + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); + if (err) + return err; + if (ice_is_xdp_ena_vsi(vsi)) { + struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + + memset(&txq_meta, 0, sizeof(txq_meta)); + ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, + &txq_meta); + if (err) + return err; + } + err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx); + if (err) + return err; + + ice_qvec_toggle_napi(vsi, q_vector, false); + ice_qp_clean_rings(vsi, q_idx); + ice_qp_reset_stats(vsi, q_idx); + + return 0; +} + +/** + * ice_qp_ena - Enables a queue pair + * @vsi: VSI of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) +{ + struct ice_aqc_add_tx_qgrp *qg_buf; + struct ice_ring *tx_ring, *rx_ring; + struct ice_q_vector *q_vector; + int err; + + if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) + return -EINVAL; + + qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL); + if (!qg_buf) + return -ENOMEM; + + qg_buf->num_txqs = 1; + + tx_ring = vsi->tx_rings[q_idx]; + rx_ring = vsi->rx_rings[q_idx]; + q_vector = rx_ring->q_vector; + + err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf); + if (err) + goto free_buf; + + if (ice_is_xdp_ena_vsi(vsi)) { + struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + + memset(qg_buf, 0, sizeof(*qg_buf)); + qg_buf->num_txqs = 1; + err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf); + if (err) + goto free_buf; + ice_set_ring_xdp(xdp_ring); + xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring); + } + + err = ice_setup_rx_ctx(rx_ring); + if (err) + goto free_buf; + + ice_qvec_cfg_msix(vsi, q_vector); + + err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx); + if (err) + goto free_buf; + + clear_bit(__ICE_CFG_BUSY, vsi->state); + ice_qvec_toggle_napi(vsi, q_vector, true); + ice_qvec_ena_irq(vsi, q_vector); + + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); +free_buf: + kfree(qg_buf); + return err; +} + +/** + * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket + * @vsi: VSI to allocate the UMEM on + * + * Returns 0 on success, negative on error + */ +static int ice_xsk_alloc_umems(struct ice_vsi *vsi) +{ + if (vsi->xsk_umems) + return 0; + + vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems), + GFP_KERNEL); + + if (!vsi->xsk_umems) { + vsi->num_xsk_umems = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * ice_xsk_add_umem - add a UMEM region for XDP sockets + * @vsi: VSI to which the UMEM will be added + * @umem: pointer to a requested UMEM region + * @qid: queue ID + * + * Returns 0 on success, negative on error + */ +static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid) +{ + int err; + + err = ice_xsk_alloc_umems(vsi); + if (err) + return err; + + vsi->xsk_umems[qid] = umem; + vsi->num_xsk_umems_used++; + + return 0; +} + +/** + * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid + * @vsi: VSI from which the VSI will be removed + * @qid: Ring/qid associated with the UMEM + */ +static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid) +{ + vsi->xsk_umems[qid] = NULL; + vsi->num_xsk_umems_used--; + + if (vsi->num_xsk_umems_used == 0) { + kfree(vsi->xsk_umems); + vsi->xsk_umems = NULL; + vsi->num_xsk_umems = 0; + } +} + +/** + * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets + * @vsi: VSI to map the UMEM region + * @umem: UMEM to map + * + * Returns 0 on success, negative on error + */ +static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + unsigned int i; + + dev = ice_pf_to_dev(pf); + for (i = 0; i < umem->npgs; i++) { + dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0, + PAGE_SIZE, + DMA_BIDIRECTIONAL, + ICE_RX_DMA_ATTR); + if (dma_mapping_error(dev, dma)) { + dev_dbg(dev, + "XSK UMEM DMA mapping error on page num %d", i); + goto out_unmap; + } + + umem->pages[i].dma = dma; + } + + return 0; + +out_unmap: + for (; i > 0; i--) { + dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR); + umem->pages[i].dma = 0; + } + + return -EFAULT; +} + +/** + * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets + * @vsi: VSI from which the UMEM will be unmapped + * @umem: UMEM to unmap + */ +static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + unsigned int i; + + dev = ice_pf_to_dev(pf); + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR); + + umem->pages[i].dma = 0; + } +} + +/** + * ice_xsk_umem_disable - disable a UMEM region + * @vsi: Current VSI + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid) +{ + if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems || + !vsi->xsk_umems[qid]) + return -EINVAL; + + ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]); + ice_xsk_remove_umem(vsi, qid); + + return 0; +} + +/** + * ice_xsk_umem_enable - enable a UMEM region + * @vsi: Current VSI + * @umem: pointer to a requested UMEM region + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +static int +ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid) +{ + struct xdp_umem_fq_reuse *reuseq; + int err; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq); + if (qid >= vsi->num_xsk_umems) + return -EINVAL; + + if (vsi->xsk_umems && vsi->xsk_umems[qid]) + return -EBUSY; + + reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count); + if (!reuseq) + return -ENOMEM; + + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + err = ice_xsk_umem_dma_map(vsi, umem); + if (err) + return err; + + err = ice_xsk_add_umem(vsi, umem, qid); + if (err) + return err; + + return 0; +} + +/** + * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state + * @vsi: Current VSI + * @umem: UMEM to enable/associate to a ring, NULL to disable + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid) +{ + bool if_running, umem_present = !!umem; + int ret = 0, umem_failure = 0; + + if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); + + if (if_running) { + ret = ice_qp_dis(vsi, qid); + if (ret) { + netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret); + goto xsk_umem_if_up; + } + } + + umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) : + ice_xsk_umem_disable(vsi, qid); + +xsk_umem_if_up: + if (if_running) { + ret = ice_qp_ena(vsi, qid); + if (!ret && umem_present) + napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); + else if (ret) + netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret); + } + + if (umem_failure) { + netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d", + umem_present ? "en" : "dis", umem_failure); + return umem_failure; + } + + return ret; +} + +/** + * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations + * @zca: zero-cpoy allocator + * @handle: Buffer handle + */ +void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle) +{ + struct ice_rx_buf *rx_buf; + struct ice_ring *rx_ring; + struct xdp_umem *umem; + u64 hr, mask; + u16 nta; + + rx_ring = container_of(zca, struct ice_ring, zca); + umem = rx_ring->xsk_umem; + hr = umem->headroom + XDP_PACKET_HEADROOM; + + mask = umem->chunk_mask; + + nta = rx_ring->next_to_alloc; + rx_buf = &rx_ring->rx_buf[nta]; + + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + handle &= mask; + + rx_buf->dma = xdp_umem_get_dma(umem, handle); + rx_buf->dma += hr; + + rx_buf->addr = xdp_umem_get_data(umem, handle); + rx_buf->addr += hr; + + rx_buf->handle = (u64)handle + umem->headroom; +} + +/** + * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem + * @rx_ring: ring with an xdp_umem bound to it + * @rx_buf: buffer to which xsk page address will be assigned + * + * This function allocates an Rx buffer in the hot path. + * The buffer can come from fill queue or recycle queue. + * + * Returns true if an assignment was successful, false if not. + */ +static __always_inline bool +ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +{ + struct xdp_umem *umem = rx_ring->xsk_umem; + void *addr = rx_buf->addr; + u64 handle, hr; + + if (addr) { + rx_ring->rx_stats.page_reuse_count++; + return true; + } + + if (!xsk_umem_peek_addr(umem, &handle)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + hr = umem->headroom + XDP_PACKET_HEADROOM; + + rx_buf->dma = xdp_umem_get_dma(umem, handle); + rx_buf->dma += hr; + + rx_buf->addr = xdp_umem_get_data(umem, handle); + rx_buf->addr += hr; + + rx_buf->handle = handle + umem->headroom; + + xsk_umem_discard_addr(umem); + return true; +} + +/** + * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem + * @rx_ring: ring with an xdp_umem bound to it + * @rx_buf: buffer to which xsk page address will be assigned + * + * This function allocates an Rx buffer in the slow path. + * The buffer can come from fill queue or recycle queue. + * + * Returns true if an assignment was successful, false if not. + */ +static __always_inline bool +ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +{ + struct xdp_umem *umem = rx_ring->xsk_umem; + u64 handle, headroom; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + handle &= umem->chunk_mask; + headroom = umem->headroom + XDP_PACKET_HEADROOM; + + rx_buf->dma = xdp_umem_get_dma(umem, handle); + rx_buf->dma += headroom; + + rx_buf->addr = xdp_umem_get_data(umem, handle); + rx_buf->addr += headroom; + + rx_buf->handle = handle + umem->headroom; + + xsk_umem_discard_addr_rq(umem); + return true; +} + +/** + * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * @alloc: the function pointer to call for allocation + * + * This function allocates a number of Rx buffers from the fill ring + * or the internal recycle mechanism and places them on the Rx ring. + * + * Returns false if all allocations were successful, true if any fail. + */ +static bool +ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count, + bool alloc(struct ice_ring *, struct ice_rx_buf *)) +{ + union ice_32b_rx_flex_desc *rx_desc; + u16 ntu = rx_ring->next_to_use; + struct ice_rx_buf *rx_buf; + bool ret = false; + + if (!count) + return false; + + rx_desc = ICE_RX_DESC(rx_ring, ntu); + rx_buf = &rx_ring->rx_buf[ntu]; + + do { + if (!alloc(rx_ring, rx_buf)) { + ret = true; + break; + } + + dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0, + rx_ring->rx_buf_len, + DMA_BIDIRECTIONAL); + + rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma); + rx_desc->wb.status_error0 = 0; + + rx_desc++; + rx_buf++; + ntu++; + + if (unlikely(ntu == rx_ring->count)) { + rx_desc = ICE_RX_DESC(rx_ring, 0); + rx_buf = rx_ring->rx_buf; + ntu = 0; + } + } while (--count); + + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); + + return ret; +} + +/** + * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path + * @rx_ring: Rx ring + * @count: number of bufs to allocate + * + * Returns false on success, true on failure. + */ +static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count) +{ + return ice_alloc_rx_bufs_zc(rx_ring, count, + ice_alloc_buf_fast_zc); +} + +/** + * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path + * @rx_ring: Rx ring + * @count: number of bufs to allocate + * + * Returns false on success, true on failure. + */ +bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count) +{ + return ice_alloc_rx_bufs_zc(rx_ring, count, + ice_alloc_buf_slow_zc); +} + +/** + * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring + * @rx_ring: Rx ring + */ +static void ice_bump_ntc(struct ice_ring *rx_ring) +{ + int ntc = rx_ring->next_to_clean + 1; + + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + prefetch(ICE_RX_DESC(rx_ring, ntc)); +} + +/** + * ice_get_rx_buf_zc - Fetch the current Rx buffer + * @rx_ring: Rx ring + * @size: size of a buffer + * + * This function returns the current, received Rx buffer and does + * DMA synchronization. + * + * Returns a pointer to the received Rx buffer. + */ +static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size) +{ + struct ice_rx_buf *rx_buf; + + rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + + dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0, + size, DMA_BIDIRECTIONAL); + + return rx_buf; +} + +/** + * ice_reuse_rx_buf_zc - reuse an Rx buffer + * @rx_ring: Rx ring + * @old_buf: The buffer to recycle + * + * This function recycles a finished Rx buffer, and places it on the recycle + * queue (next_to_alloc). + */ +static void +ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) +{ + unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask; + u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; + u16 nta = rx_ring->next_to_alloc; + struct ice_rx_buf *new_buf; + + new_buf = &rx_ring->rx_buf[nta++]; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + new_buf->dma = old_buf->dma & mask; + new_buf->dma += hr; + + new_buf->addr = (void *)((unsigned long)old_buf->addr & mask); + new_buf->addr += hr; + + new_buf->handle = old_buf->handle & mask; + new_buf->handle += rx_ring->xsk_umem->headroom; + + old_buf->addr = NULL; +} + +/** + * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer + * @rx_ring: Rx ring + * @rx_buf: zero-copy Rx buffer + * @xdp: XDP buffer + * + * This function allocates a new skb from a zero-copy Rx buffer. + * + * Returns the skb on success, NULL on failure. + */ +static struct sk_buff * +ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + unsigned int datasize_hard = xdp->data_end - + xdp->data_hard_start; + struct sk_buff *skb; + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); + if (metasize) + skb_metadata_set(skb, metasize); + + ice_reuse_rx_buf_zc(rx_ring, rx_buf); + + return skb; +} + +/** + * ice_run_xdp_zc - Executes an XDP program in zero-copy path + * @rx_ring: Rx ring + * @xdp: xdp_buff used as input to the XDP program + * + * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} + */ +static int +ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) +{ + int err, result = ICE_XDP_PASS; + struct bpf_prog *xdp_prog; + struct ice_ring *xdp_ring; + u32 act; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + return ICE_XDP_PASS; + } + + act = bpf_prog_run_xdp(xdp_prog, xdp); + xdp->handle += xdp->data - xdp->data_hard_start; + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index]; + result = ice_xmit_xdp_buff(xdp, xdp_ring); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fallthrough -- not supported action */ + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping frame */ + case XDP_DROP: + result = ICE_XDP_CONSUMED; + break; + } + + rcu_read_unlock(); + return result; +} + +/** + * ice_clean_rx_irq_zc - consumes packets from the hardware ring + * @rx_ring: AF_XDP Rx ring + * @budget: NAPI budget + * + * Returns number of processed packets on success, remaining budget on failure. + */ +int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_packets = 0; + u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + unsigned int xdp_xmit = 0; + struct xdp_buff xdp; + bool failure = 0; + + xdp.rxq = &rx_ring->xdp_rxq; + + while (likely(total_rx_packets < (unsigned int)budget)) { + union ice_32b_rx_flex_desc *rx_desc; + unsigned int size, xdp_res = 0; + struct ice_rx_buf *rx_buf; + struct sk_buff *skb; + u16 stat_err_bits; + u16 vlan_tag = 0; + u8 rx_ptype; + + if (cleaned_count >= ICE_RX_BUF_WRITE) { + failure |= ice_alloc_rx_bufs_fast_zc(rx_ring, + cleaned_count); + cleaned_count = 0; + } + + rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); + if (!ice_test_staterr(rx_desc, stat_err_bits)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we have + * verified the descriptor has been written back. + */ + dma_rmb(); + + size = le16_to_cpu(rx_desc->wb.pkt_len) & + ICE_RX_FLX_DESC_PKT_LEN_M; + if (!size) + break; + + rx_buf = ice_get_rx_buf_zc(rx_ring, size); + if (!rx_buf->addr) + break; + + xdp.data = rx_buf->addr; + xdp.data_meta = xdp.data; + xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; + xdp.data_end = xdp.data + size; + xdp.handle = rx_buf->handle; + + xdp_res = ice_run_xdp_zc(rx_ring, &xdp); + if (xdp_res) { + if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { + xdp_xmit |= xdp_res; + rx_buf->addr = NULL; + } else { + ice_reuse_rx_buf_zc(rx_ring, rx_buf); + } + + total_rx_bytes += size; + total_rx_packets++; + cleaned_count++; + + ice_bump_ntc(rx_ring); + continue; + } + + /* XDP_PASS path */ + skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp); + if (!skb) { + rx_ring->rx_stats.alloc_buf_failed++; + break; + } + + cleaned_count++; + ice_bump_ntc(rx_ring); + + if (eth_skb_pad(skb)) { + skb = NULL; + continue; + } + + total_rx_bytes += skb->len; + total_rx_packets++; + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); + if (ice_test_staterr(rx_desc, stat_err_bits)) + vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); + + rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & + ICE_RX_FLEX_DESC_PTYPE_M; + + ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + ice_receive_skb(rx_ring, skb, vlan_tag); + } + + ice_finalize_xdp_rx(rx_ring, xdp_xmit); + ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); + + return failure ? budget : (int)total_rx_packets; +} + +/** + * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries + * @xdp_ring: XDP Tx ring + * @budget: max number of frames to xmit + * + * Returns true if cleanup/transmission is done. + */ +static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget) +{ + struct ice_tx_desc *tx_desc = NULL; + bool work_done = true; + struct xdp_desc desc; + dma_addr_t dma; + + while (likely(budget-- > 0)) { + struct ice_tx_buf *tx_buf; + + if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) { + xdp_ring->tx_stats.tx_busy++; + work_done = false; + break; + } + + tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; + + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc)) + break; + + dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr); + + dma_sync_single_for_device(xdp_ring->dev, dma, desc.len, + DMA_BIDIRECTIONAL); + + tx_buf->bytecount = desc.len; + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, + 0, desc.len, 0); + + xdp_ring->next_to_use++; + if (xdp_ring->next_to_use == xdp_ring->count) + xdp_ring->next_to_use = 0; + } + + if (tx_desc) { + ice_xdp_ring_update_tail(xdp_ring); + xsk_umem_consume_tx_done(xdp_ring->xsk_umem); + } + + return budget > 0 && work_done; +} + +/** + * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer + * @xdp_ring: XDP Tx ring + * @tx_buf: Tx buffer to clean + */ +static void +ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf) +{ + xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); +} + +/** + * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries + * @xdp_ring: XDP Tx ring + * @budget: NAPI budget + * + * Returns true if cleanup/tranmission is done. + */ +bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget) +{ + int total_packets = 0, total_bytes = 0; + s16 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + bool xmit_done = true; + u32 xsk_frames = 0; + + tx_desc = ICE_TX_DESC(xdp_ring, ntc); + tx_buf = &xdp_ring->tx_buf[ntc]; + ntc -= xdp_ring->count; + + do { + if (!(tx_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) + break; + + total_bytes += tx_buf->bytecount; + total_packets++; + + if (tx_buf->raw_buf) { + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + tx_buf->raw_buf = NULL; + } else { + xsk_frames++; + } + + tx_desc->cmd_type_offset_bsz = 0; + tx_buf++; + tx_desc++; + ntc++; + + if (unlikely(!ntc)) { + ntc -= xdp_ring->count; + tx_buf = xdp_ring->tx_buf; + tx_desc = ICE_TX_DESC(xdp_ring, 0); + } + + prefetch(tx_desc); + + } while (likely(--budget)); + + ntc += xdp_ring->count; + xdp_ring->next_to_clean = ntc; + + if (xsk_frames) + xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames); + + ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes); + xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK); + + return budget > 0 && xmit_done; +} + +/** + * ice_xsk_wakeup - Implements ndo_xsk_wakeup + * @netdev: net_device + * @queue_id: queue to wake up + * @flags: ignored in our case, since we have Rx and Tx in the same NAPI + * + * Returns negative on error, zero otherwise. + */ +int +ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, + u32 __always_unused flags) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_q_vector *q_vector; + struct ice_vsi *vsi = np->vsi; + struct ice_ring *ring; + + if (test_bit(__ICE_DOWN, vsi->state)) + return -ENETDOWN; + + if (!ice_is_xdp_ena_vsi(vsi)) + return -ENXIO; + + if (queue_id >= vsi->num_txq) + return -ENXIO; + + if (!vsi->xdp_rings[queue_id]->xsk_umem) + return -ENXIO; + + ring = vsi->xdp_rings[queue_id]; + + /* The idea here is that if NAPI is running, mark a miss, so + * it will run again. If not, trigger an interrupt and + * schedule the NAPI from interrupt context. If NAPI would be + * scheduled here, the interrupt affinity would not be + * honored. + */ + q_vector = ring->q_vector; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + ice_trigger_sw_intr(&vsi->back->hw, q_vector); + + return 0; +} + +/** + * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached + * @vsi: VSI to be checked + * + * Returns true if any of the Rx rings has an AF_XDP UMEM attached + */ +bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) +{ + int i; + + if (!vsi->xsk_umems) + return false; + + for (i = 0; i < vsi->num_xsk_umems; i++) { + if (vsi->xsk_umems[i]) + return true; + } + + return false; +} + +/** + * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring + * @rx_ring: ring to be cleaned + */ +void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) +{ + u16 i; + + for (i = 0; i < rx_ring->count; i++) { + struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + + if (!rx_buf->addr) + continue; + + xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle); + rx_buf->addr = NULL; + } +} + +/** + * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues + * @xdp_ring: XDP_Tx ring + */ +void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use; + u32 xsk_frames = 0; + + while (ntc != ntu) { + struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc]; + + if (tx_buf->raw_buf) + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + else + xsk_frames++; + + tx_buf->raw_buf = NULL; + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } + + if (xsk_frames) + xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames); +} diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h new file mode 100644 index 000000000000..3479e1de98fe --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Intel Corporation. */ + +#ifndef _ICE_XSK_H_ +#define _ICE_XSK_H_ +#include "ice_txrx.h" +#include "ice.h" + +struct ice_vsi; + +#ifdef CONFIG_XDP_SOCKETS +int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid); +void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle); +int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget); +bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget); +int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); +bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count); +bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); +void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring); +void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring); +#else +static inline int +ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi, + struct xdp_umem __always_unused *umem, + u16 __always_unused qid) +{ + return -ENOTSUPP; +} + +static inline void +ice_zca_free(struct zero_copy_allocator __always_unused *zca, + unsigned long __always_unused handle) +{ +} + +static inline int +ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring, + int __always_unused budget) +{ + return 0; +} + +static inline bool +ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring, + int __always_unused budget) +{ + return false; +} + +static inline bool +ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring, + u16 __always_unused count) +{ + return false; +} + +static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi) +{ + return false; +} + +static inline int +ice_xsk_wakeup(struct net_device __always_unused *netdev, + u32 __always_unused queue_id, u32 __always_unused flags) +{ + return -ENOTSUPP; +} + +#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0) +#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0) +#endif /* CONFIG_XDP_SOCKETS */ +#endif /* !_ICE_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 6ad775b1a4c5..63ec253ac788 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -127,6 +127,7 @@ struct e1000_adv_tx_context_desc { }; #define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ #define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ #define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ed7e667d7eb2..98346eb064d5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2518,6 +2518,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_TSO | NETIF_F_TSO6); @@ -2526,6 +2527,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(network_hdr_len > IGB_MAX_NETWORK_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6); @@ -3122,7 +3124,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_CSUM; if (hw->mac.type >= e1000_82576) - netdev->features |= NETIF_F_SCTP_CRC; + netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4; if (hw->mac.type >= e1000_i350) netdev->features |= NETIF_F_HW_TC; @@ -5696,6 +5698,7 @@ static int igb_tso(struct igb_ring *tx_ring, } ip; union { struct tcphdr *tcp; + struct udphdr *udp; unsigned char *hdr; } l4; u32 paylen, l4_offset; @@ -5715,7 +5718,8 @@ static int igb_tso(struct igb_ring *tx_ring, l4.hdr = skb_checksum_start(skb); /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ - type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? + E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP; /* initialize outer IP header fields */ if (ip.v4->version == 4) { @@ -5743,12 +5747,19 @@ static int igb_tso(struct igb_ring *tx_ring, /* determine offset of inner transport header */ l4_offset = l4.hdr - skb->data; - /* compute length of segmentation header */ - *hdr_len = (l4.tcp->doff * 4) + l4_offset; - /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) { + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + } else { + /* compute length of segmentation header */ + *hdr_len = sizeof(*l4.udp) + l4_offset; + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + } /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; @@ -6225,7 +6236,6 @@ static void igb_get_stats64(struct net_device *netdev, static int igb_change_mtu(struct net_device *netdev, int new_mtu) { struct igb_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; /* adjust max frame to be at least the size of a standard frame */ @@ -6241,8 +6251,8 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) igb_down(adapter); - dev_info(&pdev->dev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 0f2b68f4bb0f..6003dc3ff5fd 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2437,8 +2437,8 @@ static int igbvf_change_mtu(struct net_device *netdev, int new_mtu) adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; - dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 7e16345d836e..0868677d43ed 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -411,7 +411,6 @@ struct igc_adapter { u32 tx_hwtstamp_timeouts; u32 tx_hwtstamp_skipped; u32 rx_hwtstamp_cleared; - u32 *shadow_vfta; u32 rss_queues; u32 rss_indir_tbl_init; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index f3f2325fe567..f3788f0b95b4 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -282,7 +282,10 @@ #define IGC_RCTL_BAM 0x00008000 /* broadcast enable */ /* Receive Descriptor bit definitions */ -#define IGC_RXD_STAT_EOP 0x02 /* End of Packet */ +#define IGC_RXD_STAT_EOP 0x02 /* End of Packet */ +#define IGC_RXD_STAT_IXSM 0x04 /* Ignore checksum */ +#define IGC_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ +#define IGC_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ #define IGC_RXDEXT_STATERR_CE 0x01000000 #define IGC_RXDEXT_STATERR_SE 0x02000000 @@ -402,4 +405,7 @@ #define IGC_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet Type of TCP */ #define IGC_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */ +/* Maximum size of the MTA register table in all supported adapters */ +#define MAX_MTA_REG 128 + #endif /* _IGC_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index abb2d72911ff..20f710645746 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -91,6 +91,7 @@ struct igc_mac_info { u16 mta_reg_count; u16 uta_reg_count; + u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; u8 forced_speed_duplex; diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index 5eeb4c8caf4a..12aa6b5fcb5d 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -784,3 +784,107 @@ bool igc_enable_mng_pass_thru(struct igc_hw *hw) out: return ret_val; } + +/** + * igc_hash_mc_addr - Generate a multicast hash value + * @hw: pointer to the HW structure + * @mc_addr: pointer to a multicast address + * + * Generates a multicast address hash value which is used to determine + * the multicast filter table array address and new table value. See + * igc_mta_set() + **/ +static u32 igc_hash_mc_addr(struct igc_hw *hw, u8 *mc_addr) +{ + u32 hash_value, hash_mask; + u8 bit_shift = 0; + + /* Register count multiplied by bits per register */ + hash_mask = (hw->mac.mta_reg_count * 32) - 1; + + /* For a mc_filter_type of 0, bit_shift is the number of left-shifts + * where 0xFF would still fall within the hash mask. + */ + while (hash_mask >> bit_shift != 0xFF) + bit_shift++; + + /* The portion of the address that is used for the hash table + * is determined by the mc_filter_type setting. + * The algorithm is such that there is a total of 8 bits of shifting. + * The bit_shift for a mc_filter_type of 0 represents the number of + * left-shifts where the MSB of mc_addr[5] would still fall within + * the hash_mask. Case 0 does this exactly. Since there are a total + * of 8 bits of shifting, then mc_addr[4] will shift right the + * remaining number of bits. Thus 8 - bit_shift. The rest of the + * cases are a variation of this algorithm...essentially raising the + * number of bits to shift mc_addr[5] left, while still keeping the + * 8-bit shifting total. + * + * For example, given the following Destination MAC Address and an + * MTA register count of 128 (thus a 4096-bit vector and 0xFFF mask), + * we can see that the bit_shift for case 0 is 4. These are the hash + * values resulting from each mc_filter_type... + * [0] [1] [2] [3] [4] [5] + * 01 AA 00 12 34 56 + * LSB MSB + * + * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 + * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 + * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 + * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 + */ + switch (hw->mac.mc_filter_type) { + default: + case 0: + break; + case 1: + bit_shift += 1; + break; + case 2: + bit_shift += 2; + break; + case 3: + bit_shift += 4; + break; + } + + hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | + (((u16)mc_addr[5]) << bit_shift))); + + return hash_value; +} + +/** + * igc_update_mc_addr_list - Update Multicast addresses + * @hw: pointer to the HW structure + * @mc_addr_list: array of multicast addresses to program + * @mc_addr_count: number of multicast addresses to program + * + * Updates entire Multicast Table Array. + * The caller must have a packed mc_addr_list of multicast addresses. + **/ +void igc_update_mc_addr_list(struct igc_hw *hw, + u8 *mc_addr_list, u32 mc_addr_count) +{ + u32 hash_value, hash_bit, hash_reg; + int i; + + /* clear mta_shadow */ + memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); + + /* update mta_shadow from mc_addr_list */ + for (i = 0; (u32)i < mc_addr_count; i++) { + hash_value = igc_hash_mc_addr(hw, mc_addr_list); + + hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); + hash_bit = hash_value & 0x1F; + + hw->mac.mta_shadow[hash_reg] |= BIT(hash_bit); + mc_addr_list += ETH_ALEN; + } + + /* replace the entire MTA table */ + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) + array_wr32(IGC_MTA, i, hw->mac.mta_shadow[i]); + wrfl(); +} diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h index 782bc995badc..832cccec87cd 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.h +++ b/drivers/net/ethernet/intel/igc/igc_mac.h @@ -29,6 +29,8 @@ s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed, u16 *duplex); bool igc_enable_mng_pass_thru(struct igc_hw *hw); +void igc_update_mc_addr_list(struct igc_hw *hw, + u8 *mc_addr_list, u32 mc_addr_count); enum igc_mng_mode { igc_mng_mode_none = 0, diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 24888676f69b..9700527dd797 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -795,6 +795,44 @@ static int igc_set_mac(struct net_device *netdev, void *p) return 0; } +/** + * igc_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure + * + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA + **/ +static int igc_write_mc_addr_list(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + struct netdev_hw_addr *ha; + u8 *mta_list; + int i; + + if (netdev_mc_empty(netdev)) { + /* nothing to program, so clear mc list */ + igc_update_mc_addr_list(hw, NULL, 0); + return 0; + } + + mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); + if (!mta_list) + return -ENOMEM; + + /* The shared function expects a packed array of only addresses. */ + i = 0; + netdev_for_each_mc_addr(ha, netdev) + memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); + + igc_update_mc_addr_list(hw, mta_list, i); + kfree(mta_list); + + return netdev_mc_count(netdev); +} + static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, struct igc_tx_buffer *first, u32 vlan_macip_lens, u32 type_tucmd, @@ -1163,6 +1201,46 @@ static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); } +static void igc_rx_checksum(struct igc_ring *ring, + union igc_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + /* Ignore Checksum bit is set */ + if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) + return; + + /* Rx checksum disabled via ethtool */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* TCP/UDP checksum error bit is set */ + if (igc_test_staterr(rx_desc, + IGC_RXDEXT_STATERR_TCPE | + IGC_RXDEXT_STATERR_IPE)) { + /* work around errata with sctp packets where the TCPE aka + * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) + * packets (aka let the stack check the crc32c) + */ + if (!(skb->len == 60 && + test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { + u64_stats_update_begin(&ring->rx_syncp); + ring->rx_stats.csum_err++; + u64_stats_update_end(&ring->rx_syncp); + } + /* let the stack verify checksum errors */ + return; + } + /* It must be a TCP or UDP packet with a valid checksum */ + if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | + IGC_RXD_STAT_UDPCS)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + dev_dbg(ring->dev, "cksum success: bits %08X\n", + le32_to_cpu(rx_desc->wb.upper.status_error)); +} + static inline void igc_rx_hash(struct igc_ring *ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -1189,6 +1267,8 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring, { igc_rx_hash(rx_ring, rx_desc, skb); + igc_rx_checksum(rx_ring, rx_desc, skb); + skb_record_rx_queue(skb, rx_ring->queue_index); skb->protocol = eth_type_trans(skb, rx_ring->netdev); @@ -2192,7 +2272,6 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) { int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct igc_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; /* adjust max frame to be at least the size of a standard frame */ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) @@ -2207,8 +2286,8 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) igc_down(adapter); - dev_info(&pdev->dev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) @@ -2518,6 +2597,110 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter, IGC_MAC_STATE_QUEUE_STEERING | flags); } +/* Add a MAC filter for 'addr' directing matching traffic to 'queue', + * 'flags' is used to indicate what kind of match is made, match is by + * default for the destination address, if matching by source address + * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used. + */ +static int igc_add_mac_filter(struct igc_adapter *adapter, + const u8 *addr, const u8 queue) +{ + struct igc_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for the first empty entry in the MAC table. + * Do not touch entries at the end of the table reserved for the VF MAC + * addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!igc_mac_entry_can_be_used(&adapter->mac_table[i], + addr, 0)) + continue; + + ether_addr_copy(adapter->mac_table[i].addr, addr); + adapter->mac_table[i].queue = queue; + adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE; + + igc_rar_set_index(adapter, i); + return i; + } + + return -ENOSPC; +} + +/* Remove a MAC filter for 'addr' directing matching traffic to + * 'queue', 'flags' is used to indicate what kind of match need to be + * removed, match is by default for the destination address, if + * matching by source address is to be removed the flag + * IGC_MAC_STATE_SRC_ADDR can be used. + */ +static int igc_del_mac_filter(struct igc_adapter *adapter, + const u8 *addr, const u8 queue) +{ + struct igc_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for matching entry in the MAC table based on given address + * and queue. Do not touch entries at the end of the table reserved + * for the VF MAC addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE)) + continue; + if (adapter->mac_table[i].state != 0) + continue; + if (adapter->mac_table[i].queue != queue) + continue; + if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) + continue; + + /* When a filter for the default address is "deleted", + * we return it to its initial configuration + */ + if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) { + adapter->mac_table[i].state = + IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; + adapter->mac_table[i].queue = 0; + } else { + adapter->mac_table[i].state = 0; + adapter->mac_table[i].queue = 0; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + } + + igc_rar_set_index(adapter, i); + return 0; + } + + return -ENOENT; +} + +static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + int ret; + + ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues); + + return min_t(int, ret, 0); +} + +static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + igc_del_mac_filter(adapter, addr, adapter->num_rx_queues); + + return 0; +} + /** * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -2529,6 +2712,44 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter, */ static void igc_set_rx_mode(struct net_device *netdev) { + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; + int count; + + /* Check for Promiscuous and All Multicast modes */ + if (netdev->flags & IFF_PROMISC) { + rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; + } else { + if (netdev->flags & IFF_ALLMULTI) { + rctl |= IGC_RCTL_MPE; + } else { + /* Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + count = igc_write_mc_addr_list(netdev); + if (count < 0) + rctl |= IGC_RCTL_MPE; + } + } + + /* Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) + rctl |= IGC_RCTL_UPE; + + /* update state of unicast and multicast */ + rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); + wr32(IGC_RCTL, rctl); + +#if (PAGE_SIZE < 8192) + if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) + rlpml = IGC_MAX_FRAME_BUILD_SKB; +#endif + wr32(IGC_RLPML, rlpml); } /** @@ -3982,6 +4203,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_open = igc_open, .ndo_stop = igc_close, .ndo_start_xmit = igc_xmit_frame, + .ndo_set_rx_mode = igc_set_rx_mode, .ndo_set_mac_address = igc_set_mac, .ndo_change_mtu = igc_change_mtu, .ndo_get_stats = igc_get_stats, @@ -4211,7 +4433,9 @@ static int igc_probe(struct pci_dev *pdev, goto err_sw_init; /* Add supported features to the features list*/ + netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; + netdev->features |= NETIF_F_SCTP_CRC; /* setup the private structure */ err = igc_sw_init(adapter); @@ -4349,7 +4573,6 @@ static void igc_remove(struct pci_dev *pdev) pci_release_mem_regions(pdev); kfree(adapter->mac_table); - kfree(adapter->shadow_vfta); free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index cc3196ae5aea..fd9f5d41b594 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -832,9 +832,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, int xdp_count, int xdp_idx, int rxr_count, int rxr_idx) { + int node = dev_to_node(&adapter->pdev->dev); struct ixgbe_q_vector *q_vector; struct ixgbe_ring *ring; - int node = NUMA_NO_NODE; int cpu = -1; int ring_count; u8 tcs = adapter->hw_tcs; @@ -845,10 +845,8 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) { u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; if (rss_i > 1 && adapter->atr_sample_rate) { - if (cpu_online(v_idx)) { - cpu = v_idx; - node = cpu_to_node(cpu); - } + cpu = cpumask_local_spread(v_idx, node); + node = cpu_to_node(cpu); } } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 91b3780ddb04..25c097cd8100 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6725,7 +6725,8 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) (new_mtu > ETH_DATA_LEN)) e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n"); - e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; @@ -7945,6 +7946,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, } ip; union { struct tcphdr *tcp; + struct udphdr *udp; unsigned char *hdr; } l4; u32 paylen, l4_offset; @@ -7968,7 +7970,8 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, l4.hdr = skb_checksum_start(skb); /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ - type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP; + type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ? + IXGBE_ADVTXD_TUCMD_L4T_UDP : IXGBE_ADVTXD_TUCMD_L4T_TCP; /* initialize outer IP header fields */ if (ip.v4->version == 4) { @@ -7998,12 +8001,20 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, /* determine offset of inner transport header */ l4_offset = l4.hdr - skb->data; - /* compute length of segmentation header */ - *hdr_len = (l4.tcp->doff * 4) + l4_offset; - /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); + + if (type_tucmd & IXGBE_ADVTXD_TUCMD_L4T_TCP) { + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + } else { + /* compute length of segmentation header */ + *hdr_len = sizeof(*l4.udp) + l4_offset; + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + } /* update gso size and bytecount with header size */ first->gso_segs = skb_shinfo(skb)->gso_segs; @@ -8639,7 +8650,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && adapter->ptp_clock) { - if (!test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && + !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IXGBE_TX_FLAGS_TSTAMP; @@ -10189,6 +10201,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_TSO | NETIF_F_TSO6); @@ -10197,6 +10210,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(network_hdr_len > IXGBE_MAX_NETWORK_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | + NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6); @@ -10906,7 +10920,7 @@ skip_sriov: IXGBE_GSO_PARTIAL_FEATURES; if (hw->mac.type >= ixgbe_mac_82599EB) - netdev->features |= NETIF_F_SCTP_CRC; + netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4; #ifdef CONFIG_IXGBE_IPSEC #define IXGBE_ESP_FEATURES (NETIF_F_HW_ESP | \ |