summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-10-21 13:18:10 +0200
committerDavid S. Miller <davem@davemloft.net>2021-10-21 13:18:10 +0200
commitdedb0809c9ba8a1ab64901e1f7aac267d3395fa8 (patch)
treeac36746ea1f88f2084938c87fcf8c0160ac20e13
parentMerge branch 'mscc-ocelot-all-ports-vlan-untagged-egress' (diff)
parentice: Add tc-flower filter support for channel (diff)
downloadlinux-dedb0809c9ba8a1ab64901e1f7aac267d3395fa8.tar.xz
linux-dedb0809c9ba8a1ab64901e1f7aac267d3395fa8.zip
Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue
Tony Nguyen says: ==================== 100GbE Intel Wired LAN Driver Updates 2021-10-20 Sudheer Mogilappagari says: This series introduces initial support for Application Device Queues(ADQ) in ice driver. ADQ provides traffic isolation for application flows in hardware and ability to steer traffic to a given traffic class. This helps in aligning NIC queues to application threads. Traffic classes are configured using mqprio framework of tc command and mapped to HW channels(VSIs) in the driver. The queue set of each traffic class is managed by corresponding VSI. Each traffic channel can be configured with bandwidth rate-limiting limits and is offloaded to the hardware through the mqprio framework by specifying the mode option as 'channel' and shaper option as 'bw_rlimit'. Next, the flows of application can be steered into a given traffic class using "tc filter" command. The option "skip_sw hw_tc x" indicates hw-offload of filtering and steering filtered traffic into specified TC. Non-matching traffic flows through TC0. When channel configuration are removed queue configuration is set to default and filters configured on individual traffic classes are deleted. example: $ ethtool -K eth0 hw-tc-offload on Configure 3 traffic classes and map priority 0,1,2 to TC0, TC1 and TC2 respectively. TC0 has 2 queues from offset 0 & TC1 has 8 queues from offset 2 and TC2 has 4 queues from offset 10. Enable hardware offload of channels. $ tc qdisc add dev eth0 root mqprio num_tc 3 map 0 1 2 queues \ 2@0 8@2 4@10 hw 1 mode channel $ tc qdisc show dev eth0 qdisc mqprio 8001: root tc 2 map 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 queues:(0:1) (2:9) (10:13) mode:channel Configure two filters to match based on dst ipaddr, dst tcp port and redirect to TC1 and TC2. $ tc qdisc add dev eth0 clsact $ tc filter add dev eth0 protocol ip ingress prio 1 flower\ dst_ip 192.168.1.1/32 ip_proto tcp dst_port 80\ skip_sw hw_tc 1 $ tc filter add dev eth0 protocol ip ingress prio 1 flower\ dst_ip 192.168.1.1/32 ip_proto tcp dst_port 5001\ skip_sw hw_tc 2 $ tc filter show dev eth0 ingress Delete traffic classes configuration: $ sudo tc qdisc del dev eth0 root ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h108
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c34
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c201
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.h10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c384
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c970
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c68
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c119
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c217
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.h22
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c4
18 files changed, 2021 insertions, 154 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 80f14886b5b1..967a90efcb11 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -38,6 +38,10 @@
#include <linux/avf/virtchnl.h>
#include <linux/cpu_rmap.h>
#include <linux/dim.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/ip.h>
#include <net/devlink.h>
#include <net/ipv6.h>
#include <net/xdp_sock.h>
@@ -55,6 +59,7 @@
#include "ice_dcb.h"
#include "ice_switch.h"
#include "ice_common.h"
+#include "ice_flow.h"
#include "ice_sched.h"
#include "ice_idc_int.h"
#include "ice_virtchnl_pf.h"
@@ -104,6 +109,10 @@
#define ICE_INVAL_VFID 256
#define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */
+
+#define ICE_CHNL_START_TC 1
+#define ICE_CHNL_MAX_TC 16
+
#define ICE_MAX_RESET_WAIT 20
#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4)
@@ -121,6 +130,13 @@
#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
#define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i]))
+/* Minimum BW limit is 500 Kbps for any scheduler node */
+#define ICE_MIN_BW_LIMIT 500
+/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes.
+ * use it to convert user specified BW limit into Kbps
+ */
+#define ICE_BW_KBPS_DIVISOR 125
+
/* Macro for each VSI in a PF */
#define ice_for_each_vsi(pf, i) \
for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
@@ -145,6 +161,9 @@
#define ice_for_each_q_vector(vsi, i) \
for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
+#define ice_for_each_chnl_tc(i) \
+ for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++)
+
#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \
ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX)
@@ -172,6 +191,21 @@ enum ice_feature {
DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key);
+struct ice_channel {
+ struct list_head list;
+ u8 type;
+ u16 sw_id;
+ u16 base_q;
+ u16 num_rxq;
+ u16 num_txq;
+ u16 vsi_num;
+ u8 ena_tc;
+ struct ice_aqc_vsi_props info;
+ u64 max_tx_rate;
+ u64 min_tx_rate;
+ struct ice_vsi *ch_vsi;
+};
+
struct ice_txq_meta {
u32 q_teid; /* Tx-scheduler element identifier */
u16 q_id; /* Entry in VSI's txq_map bitmap */
@@ -189,7 +223,7 @@ struct ice_tc_info {
struct ice_tc_cfg {
u8 numtc; /* Total number of enabled TCs */
- u8 ena_tc; /* Tx map */
+ u16 ena_tc; /* Tx map */
struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
};
@@ -361,6 +395,35 @@ struct ice_vsi {
struct net_device **target_netdevs;
+ struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
+
+ /* Channel Specific Fields */
+ struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC];
+ u16 cnt_q_avail;
+ u16 next_base_q; /* next queue to be used for channel setup */
+ struct list_head ch_list;
+ u16 num_chnl_rxq;
+ u16 num_chnl_txq;
+ u16 ch_rss_size;
+ u16 num_chnl_fltr;
+ /* store away rss size info before configuring ADQ channels so that,
+ * it can be used after tc-qdisc delete, to get back RSS setting as
+ * they were before
+ */
+ u16 orig_rss_size;
+ /* this keeps tracks of all enabled TC with and without DCB
+ * and inclusive of ADQ, vsi->mqprio_opt keeps track of queue
+ * information
+ */
+ u8 all_numtc;
+ u16 all_enatc;
+
+ /* store away TC info, to be used for rebuild logic */
+ u8 old_numtc;
+ u16 old_ena_tc;
+
+ struct ice_channel *ch;
+
/* setup back reference, to which aggregator node this VSI
* corresponds to
*/
@@ -389,6 +452,8 @@ struct ice_q_vector {
cpumask_t affinity_mask;
struct irq_affinity_notify affinity_notify;
+ struct ice_channel *ch;
+
char name[ICE_INT_NAME_STR_LEN];
u16 total_events; /* net_dim(): number of interrupts processed */
@@ -407,6 +472,7 @@ enum ice_pf_flags {
ICE_FLAG_PTP, /* PTP is enabled by software */
ICE_FLAG_AUX_ENA,
ICE_FLAG_ADV_FEATURES,
+ ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */
ICE_FLAG_CLS_FLOWER,
ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
@@ -519,7 +585,10 @@ struct ice_pf {
struct auxiliary_device *adev;
int aux_idx;
u32 sw_int_count;
-
+ /* count of tc_flower filters specific to channel (aka where filter
+ * action is "hw_tc <tc_num>")
+ */
+ u16 num_dmac_chnl_fltrs;
struct hlist_head tc_flower_fltr_list;
__le64 nvm_phy_type_lo; /* NVM PHY type low */
@@ -544,6 +613,17 @@ struct ice_netdev_priv {
};
/**
+ * ice_vector_ch_enabled
+ * @qv: pointer to q_vector, can be NULL
+ *
+ * This function returns true if vector is channel enabled otherwise false
+ */
+static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv)
+{
+ return !!qv->ch; /* Enable it to run with TC */
+}
+
+/**
* ice_irq_dynamic_ena - Enable default interrupt generation settings
* @hw: pointer to HW struct
* @vsi: pointer to VSI struct, can be NULL
@@ -704,6 +784,30 @@ static inline void ice_clear_sriov_cap(struct ice_pf *pf)
((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
+/**
+ * ice_is_adq_active - any active ADQs
+ * @pf: pointer to PF
+ *
+ * This function returns true if there are any ADQs configured (which is
+ * determined by looking at VSI type (which should be VSI_PF), numtc, and
+ * TC_MQPRIO flag) otherwise return false
+ */
+static inline bool ice_is_adq_active(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi;
+
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ return false;
+
+ /* is ADQ configured */
+ if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ return true;
+
+ return false;
+}
+
bool netif_is_ice(struct net_device *dev);
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index be625977addf..fa6cd63cbf1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -213,6 +213,9 @@ static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8
{
WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n");
+ if (ring->ch)
+ return ring->q_index - ring->ch->base_q;
+
/* Idea here for calculation is that we subtract the number of queue
* count from TC that ring belongs to from it's absolute queue index
* and as a result we get the queue's index within TC.
@@ -300,7 +303,10 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf
case ICE_VSI_LB:
case ICE_VSI_CTRL:
case ICE_VSI_PF:
- tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+ if (ring->ch)
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
+ else
+ tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
break;
case ICE_VSI_VF:
/* Firmware expects vmvf_num to be absolute VF ID */
@@ -315,7 +321,10 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf
}
/* make sure the context is associated with the right VSI */
- tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+ if (ring->ch)
+ tlan_ctx->src_vsi = ring->ch->vsi_num;
+ else
+ tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
/* Restrict Tx timestamps to the PF VSI */
switch (vsi->type) {
@@ -747,6 +756,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
u8 buf_len = struct_size(qg_buf, txqs, 1);
struct ice_tlan_ctx tlan_ctx = { 0 };
struct ice_aqc_add_txqs_perq *txq;
+ struct ice_channel *ch = ring->ch;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
enum ice_status status;
@@ -785,8 +795,14 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
ring->q_handle = ice_calc_txq_handle(vsi, ring, tc);
}
- status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
- 1, qg_buf, buf_len, NULL);
+ if (ch)
+ status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0,
+ ring->q_handle, 1, qg_buf, buf_len,
+ NULL);
+ else
+ status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc,
+ ring->q_handle, 1, qg_buf, buf_len,
+ NULL);
if (status) {
dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n",
ice_stat_str(status));
@@ -967,6 +983,7 @@ void
ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
struct ice_txq_meta *txq_meta)
{
+ struct ice_channel *ch = ring->ch;
u8 tc;
if (IS_ENABLED(CONFIG_DCB))
@@ -977,6 +994,11 @@ ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
txq_meta->q_id = ring->reg_idx;
txq_meta->q_teid = ring->txq_teid;
txq_meta->q_handle = ring->q_handle;
- txq_meta->vsi_idx = vsi->idx;
- txq_meta->tc = tc;
+ if (ch) {
+ txq_meta->vsi_idx = ch->ch_vsi->idx;
+ txq_meta->tc = 0;
+ } else {
+ txq_meta->vsi_idx = vsi->idx;
+ txq_meta->tc = tc;
+ }
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 4284526e9e24..a72e18320a22 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -5,52 +5,10 @@
#include "ice_dcb_nl.h"
/**
- * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
- * @vsi: the VSI being configured
- * @ena_tc: TC map to be enabled
- */
-void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
-{
- struct net_device *netdev = vsi->netdev;
- struct ice_pf *pf = vsi->back;
- struct ice_dcbx_cfg *dcbcfg;
- u8 netdev_tc;
- int i;
-
- if (!netdev)
- return;
-
- if (!ena_tc) {
- netdev_reset_tc(netdev);
- return;
- }
-
- if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc))
- return;
-
- dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
-
- ice_for_each_traffic_class(i)
- if (vsi->tc_cfg.ena_tc & BIT(i))
- netdev_set_tc_queue(netdev,
- vsi->tc_cfg.tc_info[i].netdev_tc,
- vsi->tc_cfg.tc_info[i].qcount_tx,
- vsi->tc_cfg.tc_info[i].qoffset);
-
- for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
- u8 ets_tc = dcbcfg->etscfg.prio_table[i];
-
- /* Get the mapped netdev TC# for the UP */
- netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
- netdev_set_prio_tc_map(netdev, i, netdev_tc);
- }
-}
-
-/**
* ice_dcb_get_ena_tc - return bitmap of enabled TCs
* @dcbcfg: DCB config to evaluate for enabled TCs
*/
-u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
+static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
{
u8 i, num_tc, ena_tc = 1;
@@ -179,6 +137,67 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
}
/**
+ * ice_get_first_droptc - returns number of first droptc
+ * @vsi: used to find the first droptc
+ *
+ * This function returns the value of first_droptc.
+ * When DCB is enabled, first droptc information is derived from enabled_tc
+ * and PFC enabled bits. otherwise this function returns 0 as there is one
+ * TC without DCB (tc0)
+ */
+static u8 ice_get_first_droptc(struct ice_vsi *vsi)
+{
+ struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+ struct device *dev = ice_pf_to_dev(vsi->back);
+ u8 num_tc, ena_tc_map, pfc_ena_map;
+ u8 i;
+
+ num_tc = ice_dcb_get_num_tc(cfg);
+
+ /* get bitmap of enabled TCs */
+ ena_tc_map = ice_dcb_get_ena_tc(cfg);
+
+ /* get bitmap of PFC enabled TCs */
+ pfc_ena_map = cfg->pfc.pfcena;
+
+ /* get first TC that is not PFC enabled */
+ for (i = 0; i < num_tc; i++) {
+ if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) {
+ dev_dbg(dev, "first drop tc = %d\n", i);
+ return i;
+ }
+ }
+
+ dev_dbg(dev, "first drop tc = 0\n");
+ return 0;
+}
+
+/**
+ * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration
+ * @vsi: pointer to the VSI instance
+ */
+void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
+{
+ struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+
+ switch (vsi->type) {
+ case ICE_VSI_PF:
+ vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
+ vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
+ break;
+ case ICE_VSI_CHNL:
+ vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi));
+ vsi->tc_cfg.numtc = 1;
+ break;
+ case ICE_VSI_CTRL:
+ case ICE_VSI_LB:
+ default:
+ vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+ vsi->tc_cfg.numtc = 1;
+ }
+}
+
+/**
* ice_dcb_get_tc - Get the TC associated with the queue
* @vsi: ptr to the VSI
* @queue_index: queue number associated with VSI
@@ -218,11 +237,68 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
qoffset = vsi->tc_cfg.tc_info[n].qoffset;
qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
- for (i = qoffset; i < (qoffset + qcount); i++) {
- tx_ring = vsi->tx_rings[i];
- rx_ring = vsi->rx_rings[i];
- tx_ring->dcb_tc = n;
- rx_ring->dcb_tc = n;
+ for (i = qoffset; i < (qoffset + qcount); i++)
+ vsi->tx_rings[i]->dcb_tc = n;
+
+ qcount = vsi->tc_cfg.tc_info[n].qcount_rx;
+ for (i = qoffset; i < (qoffset + qcount); i++)
+ vsi->rx_rings[i]->dcb_tc = n;
+ }
+ /* applicable only if "all_enatc" is set, which will be set from
+ * setup_tc method as part of configuring channels
+ */
+ if (vsi->all_enatc) {
+ u8 first_droptc = ice_get_first_droptc(vsi);
+
+ /* When DCB is configured, TC for ADQ queues (which are really
+ * PF queues) should be the first drop TC of the main VSI
+ */
+ ice_for_each_chnl_tc(n) {
+ if (!(vsi->all_enatc & BIT(n)))
+ break;
+
+ qoffset = vsi->mqprio_qopt.qopt.offset[n];
+ qcount = vsi->mqprio_qopt.qopt.count[n];
+ for (i = qoffset; i < (qoffset + qcount); i++) {
+ vsi->tx_rings[i]->dcb_tc = first_droptc;
+ vsi->rx_rings[i]->dcb_tc = first_droptc;
+ }
+ }
+ }
+}
+
+/**
+ * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig
+ * @pf: pointer to the PF instance
+ * @ena: true to enable VSIs, false to disable
+ * @locked: true if caller holds RTNL lock, false otherwise
+ *
+ * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV
+ * and CHNL need to be brought down. Following completion of DCB configuration
+ * the VSIs that were downed need to be brought up again. This helper function
+ * does both.
+ */
+static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked)
+{
+ int i;
+
+ ice_for_each_vsi(pf, i) {
+ struct ice_vsi *vsi = pf->vsi[i];
+
+ if (!vsi)
+ continue;
+
+ switch (vsi->type) {
+ case ICE_VSI_CHNL:
+ case ICE_VSI_SWITCHDEV_CTRL:
+ case ICE_VSI_PF:
+ if (ena)
+ ice_ena_vsi(vsi, locked);
+ else
+ ice_dis_vsi(vsi, locked);
+ break;
+ default:
+ continue;
}
}
}
@@ -331,7 +407,9 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
*/
if (!locked)
rtnl_lock();
- ice_dis_vsi(pf_vsi, true);
+
+ /* disable VSIs affected by DCB changes */
+ ice_dcb_ena_dis_vsi(pf, false, true);
memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
@@ -359,7 +437,8 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
ice_pf_dcb_recfg(pf);
out:
- ice_ena_vsi(pf_vsi, true);
+ /* enable previously downed VSIs */
+ ice_dcb_ena_dis_vsi(pf, true, true);
if (!locked)
rtnl_unlock();
free_cfg:
@@ -674,6 +753,8 @@ void ice_pf_dcb_recfg(struct ice_pf *pf)
tc_map = ICE_DFLT_TRAFFIC_CLASS;
ice_dcb_noncontig_cfg(pf);
}
+ } else if (vsi->type == ICE_VSI_CHNL) {
+ tc_map = BIT(ice_get_first_droptc(vsi));
} else {
tc_map = ICE_DFLT_TRAFFIC_CLASS;
}
@@ -684,10 +765,11 @@ void ice_pf_dcb_recfg(struct ice_pf *pf)
vsi->idx);
continue;
}
- /* no need to proceed with remaining cfg if it is switchdev
- * VSI
+ /* no need to proceed with remaining cfg if it is CHNL
+ * or switchdev VSI
*/
- if (vsi->type == ICE_VSI_SWITCHDEV_CTRL)
+ if (vsi->type == ICE_VSI_CHNL ||
+ vsi->type == ICE_VSI_SWITCHDEV_CTRL)
continue;
ice_vsi_map_rings_to_vectors(vsi);
@@ -862,7 +944,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
struct ice_dcbx_cfg tmp_dcbx_cfg;
bool need_reconfig = false;
struct ice_port_info *pi;
- struct ice_vsi *pf_vsi;
u8 mib_type;
int ret;
@@ -938,14 +1019,9 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
}
- pf_vsi = ice_get_main_vsi(pf);
- if (!pf_vsi) {
- dev_dbg(dev, "PF VSI doesn't exist\n");
- goto out;
- }
-
rtnl_lock();
- ice_dis_vsi(pf_vsi, true);
+ /* disable VSIs affected by DCB changes */
+ ice_dcb_ena_dis_vsi(pf, false, true);
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
@@ -956,7 +1032,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
/* changes in configuration update VSI */
ice_pf_dcb_recfg(pf);
- ice_ena_vsi(pf_vsi, true);
+ /* enable previously downed VSIs */
+ ice_dcb_ena_dis_vsi(pf, true, true);
unlock_rtnl:
rtnl_unlock();
out:
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 6700e97b3b51..4c421c842a13 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -16,7 +16,6 @@
void ice_dcb_rebuild(struct ice_pf *pf);
int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked);
-u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi);
bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue);
@@ -34,8 +33,6 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
void
ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
struct ice_rq_event_info *event);
-void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
-
/**
* ice_find_q_in_range
* @low: start of queue range for a TC i.e. offset of TC
@@ -69,6 +66,12 @@ static inline u8 ice_get_pfc_mode(struct ice_pf *pf)
#else
static inline void ice_dcb_rebuild(struct ice_pf *pf) { }
+static inline void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
+{
+ vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+ vsi->tc_cfg.numtc = 1;
+}
+
static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
{
return ICE_DFLT_TRAFFIC_CLASS;
@@ -130,7 +133,6 @@ static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { }
static inline void ice_update_dcb_stats(struct ice_pf *pf) { }
static inline void
ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { }
-static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { }
static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { }
#endif /* CONFIG_DCB */
#endif /* _ICE_DCB_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index a20c38446e76..6cb50653b18d 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -329,7 +329,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
static struct ice_vsi *
ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
{
- return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID);
+ return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID, NULL);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 8b3eef6632e9..cfe96a127ed4 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3194,6 +3194,11 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
return -EIO;
}
+ if (ice_is_adq_active(pf)) {
+ netdev_err(netdev, "Cannot change RSS params with ADQ configured.\n");
+ return -EOPNOTSUPP;
+ }
+
if (key) {
if (!vsi->rss_hkey_user) {
vsi->rss_hkey_user =
@@ -3404,6 +3409,11 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U))
return -EINVAL;
+ if (ice_is_adq_active(pf)) {
+ netdev_err(dev, "Cannot set channels with ADQ configured.\n");
+ return -EOPNOTSUPP;
+ }
+
if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) {
netdev_err(dev, "Cannot set channels when Flow Director filters are active\n");
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 231f8bea2519..4904ae088daa 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -22,6 +22,8 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
return "ICE_VSI_VF";
case ICE_VSI_CTRL:
return "ICE_VSI_CTRL";
+ case ICE_VSI_CHNL:
+ return "ICE_VSI_CHNL";
case ICE_VSI_LB:
return "ICE_VSI_LB";
case ICE_VSI_SWITCHDEV_CTRL:
@@ -73,6 +75,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
struct device *dev;
dev = ice_pf_to_dev(pf);
+ if (vsi->type == ICE_VSI_CHNL)
+ return 0;
/* allocate memory for both Tx and Rx ring pointers */
vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq,
@@ -229,6 +233,10 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
vsi->alloc_rxq = 1;
vsi->num_q_vectors = 1;
break;
+ case ICE_VSI_CHNL:
+ vsi->alloc_txq = 0;
+ vsi->alloc_rxq = 0;
+ break;
case ICE_VSI_LB:
vsi->alloc_txq = 1;
vsi->alloc_rxq = 1;
@@ -274,7 +282,7 @@ static int ice_get_free_slot(void *array, int size, int curr)
* ice_vsi_delete - delete a VSI from the switch
* @vsi: pointer to VSI being removed
*/
-static void ice_vsi_delete(struct ice_vsi *vsi)
+void ice_vsi_delete(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
struct ice_vsi_ctx *ctxt;
@@ -345,7 +353,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
*
* Returns 0 on success, negative on failure
*/
-static int ice_vsi_clear(struct ice_vsi *vsi)
+int ice_vsi_clear(struct ice_vsi *vsi)
{
struct ice_pf *pf = NULL;
struct device *dev;
@@ -438,12 +446,14 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d
* ice_vsi_alloc - Allocates the next available struct VSI in the PF
* @pf: board private structure
* @vsi_type: type of VSI
+ * @ch: ptr to channel
* @vf_id: ID of the VF being configured
*
* returns a pointer to a VSI on success, NULL on failure.
*/
static struct ice_vsi *
-ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
+ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type,
+ struct ice_channel *ch, u16 vf_id)
{
struct device *dev = ice_pf_to_dev(pf);
struct ice_vsi *vsi = NULL;
@@ -470,7 +480,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
if (vsi_type == ICE_VSI_VF)
ice_vsi_set_num_qs(vsi, vf_id);
- else
+ else if (vsi_type != ICE_VSI_CHNL)
ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID);
switch (vsi->type) {
@@ -499,6 +509,13 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
if (ice_vsi_alloc_arrays(vsi))
goto err_rings;
break;
+ case ICE_VSI_CHNL:
+ if (!ch)
+ goto err_rings;
+ vsi->num_rxq = ch->num_rxq;
+ vsi->num_txq = ch->num_txq;
+ vsi->next_base_q = ch->base_q;
+ break;
case ICE_VSI_LB:
if (ice_vsi_alloc_arrays(vsi))
goto err_rings;
@@ -615,6 +632,9 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi)
};
int ret;
+ if (vsi->type == ICE_VSI_CHNL)
+ return 0;
+
ret = __ice_vsi_get_qs(&tx_qs_cfg);
if (ret)
return ret;
@@ -733,11 +753,16 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
cap = &pf->hw.func_caps.common_cap;
switch (vsi->type) {
+ case ICE_VSI_CHNL:
case ICE_VSI_PF:
/* PF VSI will inherit RSS instance of PF */
vsi->rss_table_size = (u16)cap->rss_table_size;
- vsi->rss_size = min_t(u16, num_online_cpus(),
- BIT(cap->rss_table_entry_width));
+ if (vsi->type == ICE_VSI_CHNL)
+ vsi->rss_size = min_t(u16, vsi->num_rxq,
+ BIT(cap->rss_table_entry_width));
+ else
+ vsi->rss_size = min_t(u16, num_online_cpus(),
+ BIT(cap->rss_table_entry_width));
vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
break;
case ICE_VSI_SWITCHDEV_CTRL:
@@ -814,21 +839,13 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
u16 num_txq_per_tc, num_rxq_per_tc;
u16 qcount_tx = vsi->alloc_txq;
u16 qcount_rx = vsi->alloc_rxq;
- bool ena_tc0 = false;
u8 netdev_tc = 0;
int i;
- /* at least TC0 should be enabled by default */
- if (vsi->tc_cfg.numtc) {
- if (!(vsi->tc_cfg.ena_tc & BIT(0)))
- ena_tc0 = true;
- } else {
- ena_tc0 = true;
- }
-
- if (ena_tc0) {
- vsi->tc_cfg.numtc++;
- vsi->tc_cfg.ena_tc |= 1;
+ if (!vsi->tc_cfg.numtc) {
+ /* at least TC0 should be enabled by default */
+ vsi->tc_cfg.numtc = 1;
+ vsi->tc_cfg.ena_tc = 1;
}
num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC);
@@ -970,6 +987,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
dev = ice_pf_to_dev(pf);
switch (vsi->type) {
+ case ICE_VSI_CHNL:
case ICE_VSI_PF:
/* PF VSI will inherit RSS instance of PF */
lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
@@ -992,6 +1010,28 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
}
+static void
+ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+ struct ice_pf *pf = vsi->back;
+ u16 qcount, qmap;
+ u8 offset = 0;
+ int pow;
+
+ qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix);
+
+ pow = order_base_2(qcount);
+ qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+ ICE_AQ_VSI_TC_Q_OFFSET_M) |
+ ((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+ ICE_AQ_VSI_TC_Q_NUM_M);
+
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+ ctxt->info.q_mapping[0] = cpu_to_le16(vsi->next_base_q);
+ ctxt->info.q_mapping[1] = cpu_to_le16(qcount);
+}
+
/**
* ice_vsi_init - Create and initialize a VSI
* @vsi: the VSI being configured
@@ -1020,6 +1060,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
ctxt->flags = ICE_AQ_VSI_TYPE_PF;
break;
case ICE_VSI_SWITCHDEV_CTRL:
+ case ICE_VSI_CHNL:
ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2;
break;
case ICE_VSI_VF:
@@ -1032,6 +1073,21 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
goto out;
}
+ /* Handle VLAN pruning for channel VSI if main VSI has VLAN
+ * prune enabled
+ */
+ if (vsi->type == ICE_VSI_CHNL) {
+ struct ice_vsi *main_vsi;
+
+ main_vsi = ice_get_main_vsi(pf);
+ if (main_vsi && ice_vsi_is_vlan_pruning_ena(main_vsi))
+ ctxt->info.sw_flags2 |=
+ ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+ else
+ ctxt->info.sw_flags2 &=
+ ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+ }
+
ice_set_dflt_vsi_ctx(ctxt);
if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
ice_set_fd_vsi_ctx(ctxt, vsi);
@@ -1052,13 +1108,17 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
}
ctxt->info.sw_id = vsi->port_info->sw_id;
- ice_vsi_setup_q_map(vsi, ctxt);
- if (!init_vsi) /* means VSI being updated */
- /* must to indicate which section of VSI context are
- * being modified
- */
- ctxt->info.valid_sections |=
- cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+ if (vsi->type == ICE_VSI_CHNL) {
+ ice_chnl_vsi_setup_q_map(vsi, ctxt);
+ } else {
+ ice_vsi_setup_q_map(vsi, ctxt);
+ if (!init_vsi) /* means VSI being updated */
+ /* must to indicate which section of VSI context are
+ * being modified
+ */
+ ctxt->info.valid_sections |=
+ cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+ }
/* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off
* respectively
@@ -1237,6 +1297,8 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
/* SRIOV doesn't grab irq_tracker entries for each VSI */
if (vsi->type == ICE_VSI_VF)
return 0;
+ if (vsi->type == ICE_VSI_CHNL)
+ return 0;
if (vsi->base_vector) {
dev_dbg(dev, "VSI %d has non-zero base vector %d\n",
@@ -1403,7 +1465,7 @@ void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
* ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI
* @vsi: VSI to be configured
*/
-static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
+int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
struct device *dev;
@@ -1411,7 +1473,25 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
int err;
dev = ice_pf_to_dev(pf);
- vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
+ if (vsi->type == ICE_VSI_PF && vsi->ch_rss_size &&
+ (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))) {
+ vsi->rss_size = min_t(u16, vsi->rss_size, vsi->ch_rss_size);
+ } else {
+ vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
+
+ /* If orig_rss_size is valid and it is less than determined
+ * main VSI's rss_size, update main VSI's rss_size to be
+ * orig_rss_size so that when tc-qdisc is deleted, main VSI
+ * RSS table gets programmed to be correct (whatever it was
+ * to begin with (prior to setup-tc for ADQ config)
+ */
+ if (vsi->orig_rss_size && vsi->rss_size < vsi->orig_rss_size &&
+ vsi->orig_rss_size <= vsi->num_rxq) {
+ vsi->rss_size = vsi->orig_rss_size;
+ /* now orig_rss_size is used, reset it to zero */
+ vsi->orig_rss_size = 0;
+ }
+ }
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
if (!lut)
@@ -2260,10 +2340,14 @@ err_out:
static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
{
- struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+ if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+ vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+ vsi->tc_cfg.numtc = 1;
+ return;
+ }
- vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
- vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
+ /* set VSI TC information based on DCB config */
+ ice_vsi_set_dcb_tc_cfg(vsi);
}
/**
@@ -2376,6 +2460,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi)
switch (vsi->type) {
case ICE_VSI_CTRL:
+ case ICE_VSI_CHNL:
case ICE_VSI_LB:
case ICE_VSI_PF:
case ICE_VSI_SWITCHDEV_CTRL:
@@ -2475,6 +2560,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi)
* @vf_id: defines VF ID to which this VSI connects. This field is meant to be
* used only for ICE_VSI_VF VSI type. For other VSI types, should
* fill-in ICE_INVAL_VFID as input.
+ * @ch: ptr to channel
*
* This allocates the sw VSI structure and its queue resources.
*
@@ -2483,7 +2569,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi)
*/
struct ice_vsi *
ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
- enum ice_vsi_type vsi_type, u16 vf_id)
+ enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct device *dev = ice_pf_to_dev(pf);
@@ -2491,10 +2577,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
struct ice_vsi *vsi;
int ret, i;
- if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL)
- vsi = ice_vsi_alloc(pf, vsi_type, vf_id);
+ if (vsi_type == ICE_VSI_CHNL)
+ vsi = ice_vsi_alloc(pf, vsi_type, ch, ICE_INVAL_VFID);
+ else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL)
+ vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf_id);
else
- vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID);
+ vsi = ice_vsi_alloc(pf, vsi_type, NULL, ICE_INVAL_VFID);
if (!vsi) {
dev_err(dev, "could not allocate VSI\n");
@@ -2511,10 +2599,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
ice_alloc_fd_res(vsi);
- if (ice_vsi_get_qs(vsi)) {
- dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
- vsi->idx);
- goto unroll_vsi_alloc;
+ if (vsi_type != ICE_VSI_CHNL) {
+ if (ice_vsi_get_qs(vsi)) {
+ dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
+ vsi->idx);
+ goto unroll_vsi_alloc;
+ }
}
/* set RSS capabilities */
@@ -2573,6 +2663,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
}
ice_init_arfs(vsi);
break;
+ case ICE_VSI_CHNL:
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ ice_vsi_cfg_rss_lut_key(vsi);
+ ice_vsi_set_rss_flow_fld(vsi);
+ }
+ break;
case ICE_VSI_VF:
/* VF driver will take care of creating netdev for this type and
* map queues to vectors through Virtchnl, PF driver only
@@ -2611,9 +2707,21 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
}
/* configure VSI nodes based on number of queues and TC's */
- for (i = 0; i < vsi->tc_cfg.numtc; i++)
- max_txqs[i] = vsi->alloc_txq;
+ ice_for_each_traffic_class(i) {
+ if (!(vsi->tc_cfg.ena_tc & BIT(i)))
+ continue;
+
+ if (vsi->type == ICE_VSI_CHNL) {
+ if (!vsi->alloc_txq && vsi->num_txq)
+ max_txqs[i] = vsi->num_txq;
+ else
+ max_txqs[i] = pf->num_lan_tx;
+ } else {
+ max_txqs[i] = vsi->alloc_txq;
+ }
+ }
+ dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc);
status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
if (status) {
@@ -3270,20 +3378,42 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
goto err_vectors;
break;
+ case ICE_VSI_CHNL:
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+ ice_vsi_cfg_rss_lut_key(vsi);
+ ice_vsi_set_rss_flow_fld(vsi);
+ }
+ break;
default:
break;
}
/* configure VSI nodes based on number of queues and TC's */
for (i = 0; i < vsi->tc_cfg.numtc; i++) {
- max_txqs[i] = vsi->alloc_txq;
+ /* configure VSI nodes based on number of queues and TC's.
+ * ADQ creates VSIs for each TC/Channel but doesn't
+ * allocate queues instead it reconfigures the PF queues
+ * as per the TC command. So max_txqs should point to the
+ * PF Tx queues.
+ */
+ if (vtype == ICE_VSI_CHNL)
+ max_txqs[i] = pf->num_lan_tx;
+ else
+ max_txqs[i] = vsi->alloc_txq;
if (ice_is_xdp_ena_vsi(vsi))
max_txqs[i] += vsi->num_xdp_txq;
}
- status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
- max_txqs);
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ /* If MQPRIO is set, means channel code path, hence for main
+ * VSI's, use TC as 1
+ */
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs);
+ else
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx,
+ vsi->tc_cfg.ena_tc, max_txqs);
+
if (status) {
dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %s\n",
vsi->vsi_num, ice_stat_str(status));
@@ -3355,7 +3485,6 @@ int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout)
return 0;
}
-#ifdef CONFIG_DCB
/**
* ice_vsi_update_q_map - update our copy of the VSI info with new queue map
* @vsi: VSI being configured
@@ -3371,6 +3500,146 @@ static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
}
/**
+ * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @ena_tc: TC map to be enabled
+ */
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+ struct net_device *netdev = vsi->netdev;
+ struct ice_pf *pf = vsi->back;
+ int numtc = vsi->tc_cfg.numtc;
+ struct ice_dcbx_cfg *dcbcfg;
+ u8 netdev_tc;
+ int i;
+
+ if (!netdev)
+ return;
+
+ /* CHNL VSI doesn't have it's own netdev, hence, no netdev_tc */
+ if (vsi->type == ICE_VSI_CHNL)
+ return;
+
+ if (!ena_tc) {
+ netdev_reset_tc(netdev);
+ return;
+ }
+
+ if (vsi->type == ICE_VSI_PF && ice_is_adq_active(pf))
+ numtc = vsi->all_numtc;
+
+ if (netdev_set_num_tc(netdev, numtc))
+ return;
+
+ dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+ ice_for_each_traffic_class(i)
+ if (vsi->tc_cfg.ena_tc & BIT(i))
+ netdev_set_tc_queue(netdev,
+ vsi->tc_cfg.tc_info[i].netdev_tc,
+ vsi->tc_cfg.tc_info[i].qcount_tx,
+ vsi->tc_cfg.tc_info[i].qoffset);
+ /* setup TC queue map for CHNL TCs */
+ ice_for_each_chnl_tc(i) {
+ if (!(vsi->all_enatc & BIT(i)))
+ break;
+ if (!vsi->mqprio_qopt.qopt.count[i])
+ break;
+ netdev_set_tc_queue(netdev, i,
+ vsi->mqprio_qopt.qopt.count[i],
+ vsi->mqprio_qopt.qopt.offset[i]);
+ }
+
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ return;
+
+ for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+ u8 ets_tc = dcbcfg->etscfg.prio_table[i];
+
+ /* Get the mapped netdev TC# for the UP */
+ netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
+ netdev_set_prio_tc_map(netdev, i, netdev_tc);
+ }
+}
+
+/**
+ * ice_vsi_setup_q_map_mqprio - Prepares mqprio based tc_config
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @ena_tc: number of traffic classes to enable
+ *
+ * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+ */
+static void
+ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
+ u8 ena_tc)
+{
+ u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap;
+ u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0];
+ int tc0_qcount = vsi->mqprio_qopt.qopt.count[0];
+ u8 netdev_tc = 0;
+ int i;
+
+ vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1;
+
+ pow = order_base_2(tc0_qcount);
+ qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+ ICE_AQ_VSI_TC_Q_OFFSET_M) |
+ ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M);
+
+ ice_for_each_traffic_class(i) {
+ if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+ /* TC is not enabled */
+ vsi->tc_cfg.tc_info[i].qoffset = 0;
+ vsi->tc_cfg.tc_info[i].qcount_rx = 1;
+ vsi->tc_cfg.tc_info[i].qcount_tx = 1;
+ vsi->tc_cfg.tc_info[i].netdev_tc = 0;
+ ctxt->info.tc_mapping[i] = 0;
+ continue;
+ }
+
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+ qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+ vsi->tc_cfg.tc_info[i].qoffset = offset;
+ vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
+ vsi->tc_cfg.tc_info[i].qcount_tx = qcount_tx;
+ vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
+ }
+
+ if (vsi->all_numtc && vsi->all_numtc != vsi->tc_cfg.numtc) {
+ ice_for_each_chnl_tc(i) {
+ if (!(vsi->all_enatc & BIT(i)))
+ continue;
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+ qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+ }
+ }
+
+ /* Set actual Tx/Rx queue pairs */
+ vsi->num_txq = offset + qcount_tx;
+ vsi->num_rxq = offset + qcount_rx;
+
+ /* Setup queue TC[0].qmap for given VSI context */
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+ ctxt->info.q_mapping[1] = cpu_to_le16(tc0_qcount);
+
+ /* Find queue count available for channel VSIs and starting offset
+ * for channel VSIs
+ */
+ if (tc0_qcount && tc0_qcount < vsi->num_rxq) {
+ vsi->cnt_q_avail = vsi->num_rxq - tc0_qcount;
+ vsi->next_base_q = tc0_qcount;
+ }
+ dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_txq = %d\n", vsi->num_txq);
+ dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq);
+ dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n",
+ vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc);
+}
+
+/**
* ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map
* @vsi: VSI to be configured
* @ena_tc: TC bitmap
@@ -3388,6 +3657,9 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
u8 num_tc = 0;
dev = ice_pf_to_dev(pf);
+ if (vsi->tc_cfg.ena_tc == ena_tc &&
+ vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
+ return ret;
ice_for_each_traffic_class(i) {
/* build bitmap of enabled TCs */
@@ -3395,6 +3667,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
num_tc++;
/* populate max_txqs per TC */
max_txqs[i] = vsi->alloc_txq;
+ /* Update max_txqs if it is CHNL VSI, because alloc_t[r]xq are
+ * zero for CHNL VSI, hence use num_txq instead as max_txqs
+ */
+ if (vsi->type == ICE_VSI_CHNL &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ max_txqs[i] = vsi->num_txq;
}
vsi->tc_cfg.ena_tc = ena_tc;
@@ -3407,7 +3685,11 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
ctx->vf_num = 0;
ctx->info = vsi->info;
- ice_vsi_setup_q_map(vsi, ctx);
+ if (vsi->type == ICE_VSI_PF &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
+ else
+ ice_vsi_setup_q_map(vsi, ctx);
/* must to indicate which section of VSI context are being modified */
ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
@@ -3418,8 +3700,13 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
goto out;
}
- status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
- max_txqs);
+ if (vsi->type == ICE_VSI_PF &&
+ test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1,
+ max_txqs);
+ else
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx,
+ vsi->tc_cfg.ena_tc, max_txqs);
if (status) {
dev_err(dev, "VSI %d failed TC config, error %s\n",
@@ -3435,7 +3722,6 @@ out:
kfree(ctx);
return ret;
}
-#endif /* CONFIG_DCB */
/**
* ice_update_ring_stats - Update ring statistics
@@ -3668,7 +3954,7 @@ int ice_get_link_speed_mbps(struct ice_vsi *vsi)
*
* Return current VSI link speed and 0 if the speed is unknown.
*/
-static int ice_get_link_speed_kbps(struct ice_vsi *vsi)
+int ice_get_link_speed_kbps(struct ice_vsi *vsi)
{
int speed_mbps;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index c79fcbf82d8f..e7f4ecbb8549 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -51,13 +51,18 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
int ice_set_link(struct ice_vsi *vsi, bool ena);
-#ifdef CONFIG_DCB
+void ice_vsi_delete(struct ice_vsi *vsi);
+int ice_vsi_clear(struct ice_vsi *vsi);
+
int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
-#endif /* CONFIG_DCB */
+
+int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi);
+
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
struct ice_vsi *
ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
- enum ice_vsi_type vsi_type, u16 vf_id);
+ enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch);
void ice_napi_del(struct ice_vsi *vsi);
@@ -119,6 +124,7 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
int ice_clear_dflt_vsi(struct ice_sw *sw);
int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate);
int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate);
+int ice_get_link_speed_kbps(struct ice_vsi *vsi);
int ice_get_link_speed_mbps(struct ice_vsi *vsi);
int
ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *));
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 846623a97723..cb82abd08a40 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -55,6 +55,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
static void ice_vsi_release_all(struct ice_pf *pf);
+static int ice_rebuild_channels(struct ice_pf *pf);
+static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
+
bool netif_is_ice(struct net_device *dev)
{
return dev && (dev->netdev_ops == &ice_netdev_ops);
@@ -106,7 +109,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
ice_for_each_txq(vsi, i) {
struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
- if (tx_ring && tx_ring->desc) {
+ if (!tx_ring)
+ continue;
+ if (ice_ring_ch_enabled(tx_ring))
+ continue;
+
+ if (tx_ring->desc) {
/* If packet counter has not changed the queue is
* likely stalled, so force an interrupt for this
* queue.
@@ -458,17 +466,21 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
}
/**
- * ice_prepare_for_reset - prep for the core to reset
+ * ice_prepare_for_reset - prep for reset
* @pf: board private structure
+ * @reset_type: reset type requested
*
* Inform or close all dependent features in prep for reset.
*/
static void
-ice_prepare_for_reset(struct ice_pf *pf)
+ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
struct ice_hw *hw = &pf->hw;
+ struct ice_vsi *vsi;
unsigned int i;
+ dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
+
/* already prepared for reset */
if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
return;
@@ -483,6 +495,38 @@ ice_prepare_for_reset(struct ice_pf *pf)
ice_for_each_vf(pf, i)
ice_set_vf_state_qs_dis(&pf->vf[i]);
+ /* release ADQ specific HW and SW resources */
+ vsi = ice_get_main_vsi(pf);
+ if (!vsi)
+ goto skip;
+
+ /* to be on safe side, reset orig_rss_size so that normal flow
+ * of deciding rss_size can take precedence
+ */
+ vsi->orig_rss_size = 0;
+
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+ if (reset_type == ICE_RESET_PFR) {
+ vsi->old_ena_tc = vsi->all_enatc;
+ vsi->old_numtc = vsi->all_numtc;
+ } else {
+ ice_remove_q_channels(vsi, true);
+
+ /* for other reset type, do not support channel rebuild
+ * hence reset needed info
+ */
+ vsi->old_ena_tc = 0;
+ vsi->all_enatc = 0;
+ vsi->old_numtc = 0;
+ vsi->all_numtc = 0;
+ vsi->req_txq = 0;
+ vsi->req_rxq = 0;
+ clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+ memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
+ }
+ }
+skip:
+
/* clear SW filtering DB */
ice_clear_hw_tbls(hw);
/* disable the VSIs and their queues that are not already DOWN */
@@ -502,8 +546,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
/**
* ice_do_reset - Initiate one of many types of resets
* @pf: board private structure
- * @reset_type: reset type requested
- * before this function was called.
+ * @reset_type: reset type requested before this function was called.
*/
static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
@@ -512,7 +555,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
- ice_prepare_for_reset(pf);
+ ice_prepare_for_reset(pf, reset_type);
/* trigger the reset */
if (ice_reset(hw, reset_type)) {
@@ -570,7 +613,7 @@ static void ice_reset_subtask(struct ice_pf *pf)
/* return if no valid reset type requested */
if (reset_type == ICE_RESET_INVAL)
return;
- ice_prepare_for_reset(pf);
+ ice_prepare_for_reset(pf, reset_type);
/* make sure we are ready to rebuild */
if (ice_check_reset(&pf->hw)) {
@@ -3240,7 +3283,14 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
static struct ice_vsi *
ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
{
- return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID);
+ return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL);
+}
+
+static struct ice_vsi *
+ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+ struct ice_channel *ch)
+{
+ return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch);
}
/**
@@ -3254,7 +3304,7 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
static struct ice_vsi *
ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
{
- return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID);
+ return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID, NULL);
}
/**
@@ -3268,7 +3318,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
struct ice_vsi *
ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
{
- return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID);
+ return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID, NULL);
}
/**
@@ -3361,6 +3411,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
if (!vsi)
return -ENOMEM;
+ /* init channel list */
+ INIT_LIST_HEAD(&vsi->ch_list);
+
status = ice_cfg_netdev(vsi);
if (status) {
status = -ENODEV;
@@ -4956,7 +5009,7 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
set_bit(ICE_PFR_REQ, pf->state);
- ice_prepare_for_reset(pf);
+ ice_prepare_for_reset(pf, ICE_RESET_PFR);
}
}
@@ -5048,7 +5101,7 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
set_bit(ICE_PFR_REQ, pf->state);
- ice_prepare_for_reset(pf);
+ ice_prepare_for_reset(pf, ICE_RESET_PFR);
}
}
}
@@ -5202,6 +5255,12 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
return -EBUSY;
}
+ if (ice_chnl_dmac_fltr_cnt(pf)) {
+ netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
+ mac);
+ return -EAGAIN;
+ }
+
netif_addr_lock_bh(netdev);
ether_addr_copy(old_mac, netdev->dev_addr);
/* change the netdev's MAC address */
@@ -5453,6 +5512,18 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
ice_clear_arfs(vsi);
}
+ /* don't turn off hw_tc_offload when ADQ is already enabled */
+ if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
+ dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
+ return -EACCES;
+ }
+
+ if ((features & NETIF_F_HW_TC) &&
+ !(netdev->features & NETIF_F_HW_TC))
+ set_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
+ else
+ clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
+
return ret;
}
@@ -6307,6 +6378,9 @@ static void ice_vsi_release_all(struct ice_pf *pf)
if (!pf->vsi[i])
continue;
+ if (pf->vsi[i]->type == ICE_VSI_CHNL)
+ continue;
+
err = ice_vsi_release(pf->vsi[i]);
if (err)
dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
@@ -6517,6 +6591,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
goto err_vsi_rebuild;
}
+ if (reset_type == ICE_RESET_PFR) {
+ err = ice_rebuild_channels(pf);
+ if (err) {
+ dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
+ err);
+ goto err_vsi_rebuild;
+ }
+ }
+
/* If Flow Director is active */
if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
@@ -7183,6 +7266,861 @@ ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
}
}
+/**
+ * ice_validate_mqprio_qopt - Validate TCF input parameters
+ * @vsi: Pointer to VSI
+ * @mqprio_qopt: input parameters for mqprio queue configuration
+ *
+ * This function validates MQPRIO params, such as qcount (power of 2 wherever
+ * needed), and make sure user doesn't specify qcount and BW rate limit
+ * for TCs, which are more than "num_tc"
+ */
+static int
+ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+ struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+ u64 sum_max_rate = 0, sum_min_rate = 0;
+ int non_power_of_2_qcount = 0;
+ struct ice_pf *pf = vsi->back;
+ int max_rss_q_cnt = 0;
+ struct device *dev;
+ int i, speed;
+ u8 num_tc;
+
+ if (vsi->type != ICE_VSI_PF)
+ return -EINVAL;
+
+ if (mqprio_qopt->qopt.offset[0] != 0 ||
+ mqprio_qopt->qopt.num_tc < 1 ||
+ mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+ vsi->ch_rss_size = 0;
+ num_tc = mqprio_qopt->qopt.num_tc;
+
+ for (i = 0; num_tc; i++) {
+ int qcount = mqprio_qopt->qopt.count[i];
+ u64 max_rate, min_rate, rem;
+
+ if (!qcount)
+ return -EINVAL;
+
+ if (is_power_of_2(qcount)) {
+ if (non_power_of_2_qcount &&
+ qcount > non_power_of_2_qcount) {
+ dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
+ qcount, non_power_of_2_qcount);
+ return -EINVAL;
+ }
+ if (qcount > max_rss_q_cnt)
+ max_rss_q_cnt = qcount;
+ } else {
+ if (non_power_of_2_qcount &&
+ qcount != non_power_of_2_qcount) {
+ dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
+ qcount, non_power_of_2_qcount);
+ return -EINVAL;
+ }
+ if (qcount < max_rss_q_cnt) {
+ dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
+ qcount, max_rss_q_cnt);
+ return -EINVAL;
+ }
+ max_rss_q_cnt = qcount;
+ non_power_of_2_qcount = qcount;
+ }
+
+ /* TC command takes input in K/N/Gbps or K/M/Gbit etc but
+ * converts the bandwidth rate limit into Bytes/s when
+ * passing it down to the driver. So convert input bandwidth
+ * from Bytes/s to Kbps
+ */
+ max_rate = mqprio_qopt->max_rate[i];
+ max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
+ sum_max_rate += max_rate;
+
+ /* min_rate is minimum guaranteed rate and it can't be zero */
+ min_rate = mqprio_qopt->min_rate[i];
+ min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR);
+ sum_min_rate += min_rate;
+
+ if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
+ dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
+ min_rate, ICE_MIN_BW_LIMIT);
+ return -EINVAL;
+ }
+
+ iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
+ if (rem) {
+ dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
+ i, ICE_MIN_BW_LIMIT);
+ return -EINVAL;
+ }
+
+ iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem);
+ if (rem) {
+ dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
+ i, ICE_MIN_BW_LIMIT);
+ return -EINVAL;
+ }
+
+ /* min_rate can't be more than max_rate, except when max_rate
+ * is zero (implies max_rate sought is max line rate). In such
+ * a case min_rate can be more than max.
+ */
+ if (max_rate && min_rate > max_rate) {
+ dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
+ min_rate, max_rate);
+ return -EINVAL;
+ }
+
+ if (i >= mqprio_qopt->qopt.num_tc - 1)
+ break;
+ if (mqprio_qopt->qopt.offset[i + 1] !=
+ (mqprio_qopt->qopt.offset[i] + qcount))
+ return -EINVAL;
+ }
+ if (vsi->num_rxq <
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+ return -EINVAL;
+ if (vsi->num_txq <
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+ return -EINVAL;
+
+ speed = ice_get_link_speed_kbps(vsi);
+ if (sum_max_rate && sum_max_rate > (u64)speed) {
+ dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n",
+ sum_max_rate, speed);
+ return -EINVAL;
+ }
+ if (sum_min_rate && sum_min_rate > (u64)speed) {
+ dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
+ sum_min_rate, speed);
+ return -EINVAL;
+ }
+
+ /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */
+ vsi->ch_rss_size = max_rss_q_cnt;
+
+ return 0;
+}
+
+/**
+ * ice_add_channel - add a channel by adding VSI
+ * @pf: ptr to PF device
+ * @sw_id: underlying HW switching element ID
+ * @ch: ptr to channel structure
+ *
+ * Add a channel (VSI) using add_vsi and queue_map
+ */
+static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *vsi;
+
+ if (ch->type != ICE_VSI_CHNL) {
+ dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
+ return -EINVAL;
+ }
+
+ vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch);
+ if (!vsi || vsi->type != ICE_VSI_CHNL) {
+ dev_err(dev, "create chnl VSI failure\n");
+ return -EINVAL;
+ }
+
+ ch->sw_id = sw_id;
+ ch->vsi_num = vsi->vsi_num;
+ ch->info.mapping_flags = vsi->info.mapping_flags;
+ ch->ch_vsi = vsi;
+ /* set the back pointer of channel for newly created VSI */
+ vsi->ch = ch;
+
+ memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
+ sizeof(vsi->info.q_mapping));
+ memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
+ sizeof(vsi->info.tc_mapping));
+
+ return 0;
+}
+
+/**
+ * ice_chnl_cfg_res
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Configure channel specific resources such as rings, vector.
+ */
+static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+ int i;
+
+ for (i = 0; i < ch->num_txq; i++) {
+ struct ice_q_vector *tx_q_vector, *rx_q_vector;
+ struct ice_ring_container *rc;
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+
+ tx_ring = vsi->tx_rings[ch->base_q + i];
+ rx_ring = vsi->rx_rings[ch->base_q + i];
+ if (!tx_ring || !rx_ring)
+ continue;
+
+ /* setup ring being channel enabled */
+ tx_ring->ch = ch;
+ rx_ring->ch = ch;
+
+ /* following code block sets up vector specific attributes */
+ tx_q_vector = tx_ring->q_vector;
+ rx_q_vector = rx_ring->q_vector;
+ if (!tx_q_vector && !rx_q_vector)
+ continue;
+
+ if (tx_q_vector) {
+ tx_q_vector->ch = ch;
+ /* setup Tx and Rx ITR setting if DIM is off */
+ rc = &tx_q_vector->tx;
+ if (!ITR_IS_DYNAMIC(rc))
+ ice_write_itr(rc, rc->itr_setting);
+ }
+ if (rx_q_vector) {
+ rx_q_vector->ch = ch;
+ /* setup Tx and Rx ITR setting if DIM is off */
+ rc = &rx_q_vector->rx;
+ if (!ITR_IS_DYNAMIC(rc))
+ ice_write_itr(rc, rc->itr_setting);
+ }
+ }
+
+ /* it is safe to assume that, if channel has non-zero num_t[r]xq, then
+ * GLINT_ITR register would have written to perform in-context
+ * update, hence perform flush
+ */
+ if (ch->num_txq || ch->num_rxq)
+ ice_flush(&vsi->back->hw);
+}
+
+/**
+ * ice_cfg_chnl_all_res - configure channel resources
+ * @vsi: pte to main_vsi
+ * @ch: ptr to channel structure
+ *
+ * This function configures channel specific resources such as flow-director
+ * counter index, and other resources such as queues, vectors, ITR settings
+ */
+static void
+ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+ /* configure channel (aka ADQ) resources such as queues, vectors,
+ * ITR settings for channel specific vectors and anything else
+ */
+ ice_chnl_cfg_res(vsi, ch);
+}
+
+/**
+ * ice_setup_hw_channel - setup new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ * @sw_id: underlying HW switching element ID
+ * @type: type of channel to be created (VMDq2/VF)
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and configures Tx rings accordingly
+ */
+static int
+ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi,
+ struct ice_channel *ch, u16 sw_id, u8 type)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ int ret;
+
+ ch->base_q = vsi->next_base_q;
+ ch->type = type;
+
+ ret = ice_add_channel(pf, sw_id, ch);
+ if (ret) {
+ dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
+ return ret;
+ }
+
+ /* configure/setup ADQ specific resources */
+ ice_cfg_chnl_all_res(vsi, ch);
+
+ /* make sure to update the next_base_q so that subsequent channel's
+ * (aka ADQ) VSI queue map is correct
+ */
+ vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
+ dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
+ ch->num_rxq);
+
+ return 0;
+}
+
+/**
+ * ice_setup_channel - setup new channel using uplink element
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and uplink switching element
+ */
+static bool
+ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi,
+ struct ice_channel *ch)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ u16 sw_id;
+ int ret;
+
+ if (vsi->type != ICE_VSI_PF) {
+ dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
+ return false;
+ }
+
+ sw_id = pf->first_sw->sw_id;
+
+ /* create channel (VSI) */
+ ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL);
+ if (ret) {
+ dev_err(dev, "failed to setup hw_channel\n");
+ return false;
+ }
+ dev_dbg(dev, "successfully created channel()\n");
+
+ return ch->ch_vsi ? true : false;
+}
+
+/**
+ * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
+ * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
+ */
+static int
+ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
+{
+ int err;
+
+ err = ice_set_min_bw_limit(vsi, min_tx_rate);
+ if (err)
+ return err;
+
+ return ice_set_max_bw_limit(vsi, max_tx_rate);
+}
+
+/**
+ * ice_create_q_channel - function to create channel
+ * @vsi: VSI to be configured
+ * @ch: ptr to channel (it contains channel specific params)
+ *
+ * This function creates channel (VSI) using num_queues specified by user,
+ * reconfigs RSS if needed.
+ */
+static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+ struct ice_pf *pf = vsi->back;
+ struct device *dev;
+
+ if (!ch)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(pf);
+ if (!ch->num_txq || !ch->num_rxq) {
+ dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
+ return -EINVAL;
+ }
+
+ if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) {
+ dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
+ vsi->cnt_q_avail, ch->num_txq);
+ return -EINVAL;
+ }
+
+ if (!ice_setup_channel(pf, vsi, ch)) {
+ dev_info(dev, "Failed to setup channel\n");
+ return -EINVAL;
+ }
+ /* configure BW rate limit */
+ if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) {
+ int ret;
+
+ ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate,
+ ch->min_tx_rate);
+ if (ret)
+ dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
+ ch->max_tx_rate, ch->ch_vsi->vsi_num);
+ else
+ dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
+ ch->max_tx_rate, ch->ch_vsi->vsi_num);
+ }
+
+ vsi->cnt_q_avail -= ch->num_txq;
+
+ return 0;
+}
+
+/**
+ * ice_rem_all_chnl_fltrs - removes all channel filters
+ * @pf: ptr to PF, TC-flower based filter are tracked at PF level
+ *
+ * Remove all advanced switch filters only if they are channel specific
+ * tc-flower based filter
+ */
+static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)
+{
+ struct ice_tc_flower_fltr *fltr;
+ struct hlist_node *node;
+
+ /* to remove all channel filters, iterate an ordered list of filters */
+ hlist_for_each_entry_safe(fltr, node,
+ &pf->tc_flower_fltr_list,
+ tc_flower_node) {
+ struct ice_rule_query_data rule;
+ int status;
+
+ /* for now process only channel specific filters */
+ if (!ice_is_chnl_fltr(fltr))
+ continue;
+
+ rule.rid = fltr->rid;
+ rule.rule_id = fltr->rule_id;
+ rule.vsi_handle = fltr->dest_id;
+ status = ice_rem_adv_rule_by_id(&pf->hw, &rule);
+ if (status) {
+ if (status == -ENOENT)
+ dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n",
+ rule.rule_id);
+ else
+ dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n",
+ status);
+ } else if (fltr->dest_vsi) {
+ /* update advanced switch filter count */
+ if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
+ u32 flags = fltr->flags;
+
+ fltr->dest_vsi->num_chnl_fltr--;
+ if (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_ENC_DST_MAC))
+ pf->num_dmac_chnl_fltrs--;
+ }
+ }
+
+ hlist_del(&fltr->tc_flower_node);
+ kfree(fltr);
+ }
+}
+
+/**
+ * ice_remove_q_channels - Remove queue channels for the TCs
+ * @vsi: VSI to be configured
+ * @rem_fltr: delete advanced switch filter or not
+ *
+ * Remove queue channels for the TCs
+ */
+static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)
+{
+ struct ice_channel *ch, *ch_tmp;
+ struct ice_pf *pf = vsi->back;
+ int i;
+
+ /* remove all tc-flower based filter if they are channel filters only */
+ if (rem_fltr)
+ ice_rem_all_chnl_fltrs(pf);
+
+ /* perform cleanup for channels if they exist */
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ struct ice_vsi *ch_vsi;
+
+ list_del(&ch->list);
+ ch_vsi = ch->ch_vsi;
+ if (!ch_vsi) {
+ kfree(ch);
+ continue;
+ }
+
+ /* Reset queue contexts */
+ for (i = 0; i < ch->num_rxq; i++) {
+ struct ice_tx_ring *tx_ring;
+ struct ice_rx_ring *rx_ring;
+
+ tx_ring = vsi->tx_rings[ch->base_q + i];
+ rx_ring = vsi->rx_rings[ch->base_q + i];
+ if (tx_ring) {
+ tx_ring->ch = NULL;
+ if (tx_ring->q_vector)
+ tx_ring->q_vector->ch = NULL;
+ }
+ if (rx_ring) {
+ rx_ring->ch = NULL;
+ if (rx_ring->q_vector)
+ rx_ring->q_vector->ch = NULL;
+ }
+ }
+
+ /* clear the VSI from scheduler tree */
+ ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx);
+
+ /* Delete VSI from FW */
+ ice_vsi_delete(ch->ch_vsi);
+
+ /* Delete VSI from PF and HW VSI arrays */
+ ice_vsi_clear(ch->ch_vsi);
+
+ /* free the channel */
+ kfree(ch);
+ }
+
+ /* clear the channel VSI map which is stored in main VSI */
+ ice_for_each_chnl_tc(i)
+ vsi->tc_map_vsi[i] = NULL;
+
+ /* reset main VSI's all TC information */
+ vsi->all_enatc = 0;
+ vsi->all_numtc = 0;
+}
+
+/**
+ * ice_rebuild_channels - rebuild channel
+ * @pf: ptr to PF
+ *
+ * Recreate channel VSIs and replay filters
+ */
+static int ice_rebuild_channels(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ struct ice_vsi *main_vsi;
+ bool rem_adv_fltr = true;
+ struct ice_channel *ch;
+ struct ice_vsi *vsi;
+ int tc_idx = 1;
+ int i, err;
+
+ main_vsi = ice_get_main_vsi(pf);
+ if (!main_vsi)
+ return 0;
+
+ if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) ||
+ main_vsi->old_numtc == 1)
+ return 0; /* nothing to be done */
+
+ /* reconfigure main VSI based on old value of TC and cached values
+ * for MQPRIO opts
+ */
+ err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc);
+ if (err) {
+ dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
+ main_vsi->old_ena_tc, main_vsi->vsi_num);
+ return err;
+ }
+
+ /* rebuild ADQ VSIs */
+ ice_for_each_vsi(pf, i) {
+ enum ice_vsi_type type;
+
+ vsi = pf->vsi[i];
+ if (!vsi || vsi->type != ICE_VSI_CHNL)
+ continue;
+
+ type = vsi->type;
+
+ /* rebuild ADQ VSI */
+ err = ice_vsi_rebuild(vsi, true);
+ if (err) {
+ dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
+ ice_vsi_type_str(type), vsi->idx, err);
+ goto cleanup;
+ }
+
+ /* Re-map HW VSI number, using VSI handle that has been
+ * previously validated in ice_replay_vsi() call above
+ */
+ vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+
+ /* replay filters for the VSI */
+ err = ice_replay_vsi(&pf->hw, vsi->idx);
+ if (err) {
+ dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
+ ice_vsi_type_str(type), err, vsi->idx);
+ rem_adv_fltr = false;
+ goto cleanup;
+ }
+ dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
+ ice_vsi_type_str(type), vsi->idx);
+
+ /* store ADQ VSI at correct TC index in main VSI's
+ * map of TC to VSI
+ */
+ main_vsi->tc_map_vsi[tc_idx++] = vsi;
+ }
+
+ /* ADQ VSI(s) has been rebuilt successfully, so setup
+ * channel for main VSI's Tx and Rx rings
+ */
+ list_for_each_entry(ch, &main_vsi->ch_list, list) {
+ struct ice_vsi *ch_vsi;
+
+ ch_vsi = ch->ch_vsi;
+ if (!ch_vsi)
+ continue;
+
+ /* reconfig channel resources */
+ ice_cfg_chnl_all_res(main_vsi, ch);
+
+ /* replay BW rate limit if it is non-zero */
+ if (!ch->max_tx_rate && !ch->min_tx_rate)
+ continue;
+
+ err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate,
+ ch->min_tx_rate);
+ if (err)
+ dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
+ err, ch->max_tx_rate, ch->min_tx_rate,
+ ch_vsi->vsi_num);
+ else
+ dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
+ ch->max_tx_rate, ch->min_tx_rate,
+ ch_vsi->vsi_num);
+ }
+
+ /* reconfig RSS for main VSI */
+ if (main_vsi->ch_rss_size)
+ ice_vsi_cfg_rss_lut_key(main_vsi);
+
+ return 0;
+
+cleanup:
+ ice_remove_q_channels(main_vsi, rem_adv_fltr);
+ return err;
+}
+
+/**
+ * ice_create_q_channels - Add queue channel for the given TCs
+ * @vsi: VSI to be configured
+ *
+ * Configures queue channel mapping to the given TCs
+ */
+static int ice_create_q_channels(struct ice_vsi *vsi)
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_channel *ch;
+ int ret = 0, i;
+
+ ice_for_each_chnl_tc(i) {
+ if (!(vsi->all_enatc & BIT(i)))
+ continue;
+
+ ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+ if (!ch) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+ INIT_LIST_HEAD(&ch->list);
+ ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
+ ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
+ ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
+ ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
+ ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
+
+ /* convert to Kbits/s */
+ if (ch->max_tx_rate)
+ ch->max_tx_rate = div_u64(ch->max_tx_rate,
+ ICE_BW_KBPS_DIVISOR);
+ if (ch->min_tx_rate)
+ ch->min_tx_rate = div_u64(ch->min_tx_rate,
+ ICE_BW_KBPS_DIVISOR);
+
+ ret = ice_create_q_channel(vsi, ch);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf),
+ "failed creating channel TC:%d\n", i);
+ kfree(ch);
+ goto err_free;
+ }
+ list_add_tail(&ch->list, &vsi->ch_list);
+ vsi->tc_map_vsi[i] = ch->ch_vsi;
+ dev_dbg(ice_pf_to_dev(pf),
+ "successfully created channel: VSI %pK\n", ch->ch_vsi);
+ }
+ return 0;
+
+err_free:
+ ice_remove_q_channels(vsi, false);
+
+ return ret;
+}
+
+/**
+ * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
+ * @netdev: net device to configure
+ * @type_data: TC offload data
+ */
+static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data)
+{
+ struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_pf *pf = vsi->back;
+ u16 mode, ena_tc_qdisc = 0;
+ int cur_txq, cur_rxq;
+ u8 hw = 0, num_tcf;
+ struct device *dev;
+ int ret, i;
+
+ dev = ice_pf_to_dev(pf);
+ num_tcf = mqprio_qopt->qopt.num_tc;
+ hw = mqprio_qopt->qopt.hw;
+ mode = mqprio_qopt->mode;
+ if (!hw) {
+ clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+ vsi->ch_rss_size = 0;
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+ goto config_tcf;
+ }
+
+ /* Generate queue region map for number of TCF requested */
+ for (i = 0; i < num_tcf; i++)
+ ena_tc_qdisc |= BIT(i);
+
+ switch (mode) {
+ case TC_MQPRIO_MODE_CHANNEL:
+
+ ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
+ if (ret) {
+ netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n",
+ ret);
+ return ret;
+ }
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+ set_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+ /* don't assume state of hw_tc_offload during driver load
+ * and set the flag for TC flower filter if hw_tc_offload
+ * already ON
+ */
+ if (vsi->netdev->features & NETIF_F_HW_TC)
+ set_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+config_tcf:
+
+ /* Requesting same TCF configuration as already enabled */
+ if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
+ mode != TC_MQPRIO_MODE_CHANNEL)
+ return 0;
+
+ /* Pause VSI queues */
+ ice_dis_vsi(vsi, true);
+
+ if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+ ice_remove_q_channels(vsi, true);
+
+ if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+ vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
+ num_online_cpus());
+ vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
+ num_online_cpus());
+ } else {
+ /* logic to rebuild VSI, same like ethtool -L */
+ u16 offset = 0, qcount_tx = 0, qcount_rx = 0;
+
+ for (i = 0; i < num_tcf; i++) {
+ if (!(ena_tc_qdisc & BIT(i)))
+ continue;
+
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+ qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+ }
+ vsi->req_txq = offset + qcount_tx;
+ vsi->req_rxq = offset + qcount_rx;
+
+ /* store away original rss_size info, so that it gets reused
+ * form ice_vsi_rebuild during tc-qdisc delete stage - to
+ * determine, what should be the rss_sizefor main VSI
+ */
+ vsi->orig_rss_size = vsi->rss_size;
+ }
+
+ /* save current values of Tx and Rx queues before calling VSI rebuild
+ * for fallback option
+ */
+ cur_txq = vsi->num_txq;
+ cur_rxq = vsi->num_rxq;
+
+ /* proceed with rebuild main VSI using correct number of queues */
+ ret = ice_vsi_rebuild(vsi, false);
+ if (ret) {
+ /* fallback to current number of queues */
+ dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
+ vsi->req_txq = cur_txq;
+ vsi->req_rxq = cur_rxq;
+ clear_bit(ICE_RESET_FAILED, pf->state);
+ if (ice_vsi_rebuild(vsi, false)) {
+ dev_err(dev, "Rebuild of main VSI failed again\n");
+ return ret;
+ }
+ }
+
+ vsi->all_numtc = num_tcf;
+ vsi->all_enatc = ena_tc_qdisc;
+ ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc);
+ if (ret) {
+ netdev_err(netdev, "failed configuring TC for VSI id=%d\n",
+ vsi->vsi_num);
+ goto exit;
+ }
+
+ if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+ u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+ u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0];
+
+ /* set TC0 rate limit if specified */
+ if (max_tx_rate || min_tx_rate) {
+ /* convert to Kbits/s */
+ if (max_tx_rate)
+ max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR);
+ if (min_tx_rate)
+ min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR);
+
+ ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
+ if (!ret) {
+ dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
+ max_tx_rate, min_tx_rate, vsi->vsi_num);
+ } else {
+ dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
+ max_tx_rate, min_tx_rate, vsi->vsi_num);
+ goto exit;
+ }
+ }
+ ret = ice_create_q_channels(vsi);
+ if (ret) {
+ netdev_err(netdev, "failed configuring queue channels\n");
+ goto exit;
+ } else {
+ netdev_dbg(netdev, "successfully configured channels\n");
+ }
+ }
+
+ if (vsi->ch_rss_size)
+ ice_vsi_cfg_rss_lut_key(vsi);
+
+exit:
+ /* if error, reset the all_numtc and all_enatc */
+ if (ret) {
+ vsi->all_numtc = 0;
+ vsi->all_enatc = 0;
+ }
+ /* resume VSI */
+ ice_ena_vsi(vsi, true);
+
+ return ret;
+}
+
static LIST_HEAD(ice_block_cb_list);
static int
@@ -7190,6 +8128,8 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
void *type_data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ int err;
switch (type) {
case TC_SETUP_BLOCK:
@@ -7197,6 +8137,12 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
&ice_block_cb_list,
ice_setup_tc_block_cb,
np, np, true);
+ case TC_SETUP_QDISC_MQPRIO:
+ /* setup traffic classifier for receive side */
+ mutex_lock(&pf->tc_mutex);
+ err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
+ mutex_unlock(&pf->tc_mutex);
+ return err;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 560e52b99f83..fe36c3b5eb4c 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -2999,6 +2999,43 @@ static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
}
/**
+ * ice_sched_save_vsi_bw - save VSI node's BW information
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of VSI type node for post replay use.
+ */
+static int
+ice_sched_save_vsi_bw(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ enum ice_rl_type rl_type, u32 bw)
+{
+ struct ice_vsi_ctx *vsi_ctx;
+
+ if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+ return -EINVAL;
+ vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+ if (!vsi_ctx)
+ return -EINVAL;
+ switch (rl_type) {
+ case ICE_MIN_BW:
+ ice_set_clear_cir_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+ break;
+ case ICE_MAX_BW:
+ ice_set_clear_eir_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+ break;
+ case ICE_SHARED_BW:
+ ice_set_clear_shared_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
* ice_sched_calc_wakeup - calculate RL profile wakeup parameter
* @hw: pointer to the HW struct
* @bw: bandwidth in Kbps
@@ -3875,9 +3912,17 @@ enum ice_status
ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_rl_type rl_type, u32 bw)
{
- return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
- ICE_AGG_TYPE_VSI,
- tc, rl_type, bw);
+ int status;
+
+ status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+ ICE_AGG_TYPE_VSI,
+ tc, rl_type, bw);
+ if (!status) {
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, bw);
+ mutex_unlock(&pi->sched_lock);
+ }
+ return status;
}
/**
@@ -3894,10 +3939,19 @@ enum ice_status
ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_rl_type rl_type)
{
- return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
- ICE_AGG_TYPE_VSI,
- tc, rl_type,
- ICE_SCHED_DFLT_BW);
+ int status;
+
+ status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+ ICE_AGG_TYPE_VSI,
+ tc, rl_type,
+ ICE_SCHED_DFLT_BW);
+ if (!status) {
+ mutex_lock(&pi->sched_lock);
+ status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type,
+ ICE_SCHED_DFLT_BW);
+ mutex_unlock(&pi->sched_lock);
+ }
+ return status;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index f89b80ba3499..551e13e46653 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -58,6 +58,8 @@ struct ice_sched_agg_info {
DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
u32 agg_id;
enum ice_agg_type agg_type;
+ /* bw_t_info saves aggregator BW information */
+ struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
/* save aggregator TC bitmap */
DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
};
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index a4a299012f9f..2742e1c1e337 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -2273,6 +2273,125 @@ exit:
}
/**
+ * ice_mac_fltr_exist - does this MAC filter exist for given VSI
+ * @hw: pointer to the hardware structure
+ * @mac: MAC address to be checked (for MAC filter)
+ * @vsi_handle: check MAC filter for this VSI
+ */
+bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle)
+{
+ struct ice_fltr_mgmt_list_entry *entry;
+ struct list_head *rule_head;
+ struct ice_switch_info *sw;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ u16 hw_vsi_id;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return false;
+
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+ sw = hw->switch_info;
+ rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
+ if (!rule_head)
+ return false;
+
+ rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+ mutex_lock(rule_lock);
+ list_for_each_entry(entry, rule_head, list_entry) {
+ struct ice_fltr_info *f_info = &entry->fltr_info;
+ u8 *mac_addr = &f_info->l_data.mac.mac_addr[0];
+
+ if (is_zero_ether_addr(mac_addr))
+ continue;
+
+ if (f_info->flag != ICE_FLTR_TX ||
+ f_info->src_id != ICE_SRC_ID_VSI ||
+ f_info->lkup_type != ICE_SW_LKUP_MAC ||
+ f_info->fltr_act != ICE_FWD_TO_VSI ||
+ hw_vsi_id != f_info->fwd_id.hw_vsi_id)
+ continue;
+
+ if (ether_addr_equal(mac, mac_addr)) {
+ mutex_unlock(rule_lock);
+ return true;
+ }
+ }
+ mutex_unlock(rule_lock);
+ return false;
+}
+
+/**
+ * ice_vlan_fltr_exist - does this VLAN filter exist for given VSI
+ * @hw: pointer to the hardware structure
+ * @vlan_id: VLAN ID
+ * @vsi_handle: check MAC filter for this VSI
+ */
+bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle)
+{
+ struct ice_fltr_mgmt_list_entry *entry;
+ struct list_head *rule_head;
+ struct ice_switch_info *sw;
+ struct mutex *rule_lock; /* Lock to protect filter rule list */
+ u16 hw_vsi_id;
+
+ if (vlan_id > ICE_MAX_VLAN_ID)
+ return false;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return false;
+
+ hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+ sw = hw->switch_info;
+ rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
+ if (!rule_head)
+ return false;
+
+ rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+ mutex_lock(rule_lock);
+ list_for_each_entry(entry, rule_head, list_entry) {
+ struct ice_fltr_info *f_info = &entry->fltr_info;
+ u16 entry_vlan_id = f_info->l_data.vlan.vlan_id;
+ struct ice_vsi_list_map_info *map_info;
+
+ if (entry_vlan_id > ICE_MAX_VLAN_ID)
+ continue;
+
+ if (f_info->flag != ICE_FLTR_TX ||
+ f_info->src_id != ICE_SRC_ID_VSI ||
+ f_info->lkup_type != ICE_SW_LKUP_VLAN)
+ continue;
+
+ /* Only allowed filter action are FWD_TO_VSI/_VSI_LIST */
+ if (f_info->fltr_act != ICE_FWD_TO_VSI &&
+ f_info->fltr_act != ICE_FWD_TO_VSI_LIST)
+ continue;
+
+ if (f_info->fltr_act == ICE_FWD_TO_VSI) {
+ if (hw_vsi_id != f_info->fwd_id.hw_vsi_id)
+ continue;
+ } else if (f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+ /* If filter_action is FWD_TO_VSI_LIST, make sure
+ * that VSI being checked is part of VSI list
+ */
+ if (entry->vsi_count == 1 &&
+ entry->vsi_list_info) {
+ map_info = entry->vsi_list_info;
+ if (!test_bit(vsi_handle, map_info->vsi_map))
+ continue;
+ }
+ }
+
+ if (vlan_id == entry_vlan_id) {
+ mutex_unlock(rule_lock);
+ return true;
+ }
+ }
+ mutex_unlock(rule_lock);
+
+ return false;
+}
+
+/**
* ice_add_mac - Add a MAC address based filter rule
* @hw: pointer to the hardware structure
* @m_list: list of MAC addresses and forwarding information
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index d4c0a3b594af..c4dd2062c469 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -335,6 +335,8 @@ enum ice_status
ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list);
int
ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable);
+bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle);
+bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle);
void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle);
enum ice_status
ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index 1dccfd116bc9..725caa160b13 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -304,6 +304,136 @@ exit:
}
/**
+ * ice_add_tc_flower_adv_fltr - add appropriate filter rules
+ * @vsi: Pointer to VSI
+ * @tc_fltr: Pointer to TC flower filter structure
+ *
+ * based on filter parameters using Advance recipes supported
+ * by OS package.
+ */
+static int
+ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
+ struct ice_tc_flower_fltr *tc_fltr)
+{
+ struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers;
+ struct ice_adv_rule_info rule_info = {0};
+ struct ice_rule_query_data rule_added;
+ struct ice_adv_lkup_elem *list;
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 flags = tc_fltr->flags;
+ struct ice_vsi *ch_vsi;
+ struct device *dev;
+ u16 lkups_cnt = 0;
+ u16 l4_proto = 0;
+ int ret = 0;
+ u16 i = 0;
+
+ dev = ice_pf_to_dev(pf);
+ if (ice_is_safe_mode(pf)) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because driver is in safe mode");
+ return -EOPNOTSUPP;
+ }
+
+ if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 |
+ ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+ ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 |
+ ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+ ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unsupported encap field(s)");
+ return -EOPNOTSUPP;
+ }
+
+ /* get the channel (aka ADQ VSI) */
+ if (tc_fltr->dest_vsi)
+ ch_vsi = tc_fltr->dest_vsi;
+ else
+ ch_vsi = vsi->tc_map_vsi[tc_fltr->action.tc_class];
+
+ lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr);
+ list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+ if (!list)
+ return -ENOMEM;
+
+ i = ice_tc_fill_rules(hw, flags, tc_fltr, list, &rule_info, &l4_proto);
+ if (i != lkups_cnt) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act;
+ if (tc_fltr->action.tc_class >= ICE_CHNL_START_TC) {
+ if (!ch_vsi) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because specified destination doesn't exist");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI;
+ rule_info.sw_act.vsi_handle = ch_vsi->idx;
+ rule_info.priority = 7;
+ rule_info.sw_act.src = hw->pf_id;
+ rule_info.rx = true;
+ dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n",
+ tc_fltr->action.tc_class,
+ rule_info.sw_act.vsi_handle, lkups_cnt);
+ } else {
+ rule_info.sw_act.flag |= ICE_FLTR_TX;
+ rule_info.sw_act.src = vsi->idx;
+ rule_info.rx = false;
+ }
+
+ /* specify the cookie as filter_rule_id */
+ rule_info.fltr_rule_id = tc_fltr->cookie;
+
+ ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
+ if (ret == -EEXIST) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+ "Unable to add filter because it already exist");
+ ret = -EINVAL;
+ goto exit;
+ } else if (ret) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+ "Unable to add filter due to error");
+ ret = -EIO;
+ goto exit;
+ }
+
+ /* store the output params, which are needed later for removing
+ * advanced switch filter
+ */
+ tc_fltr->rid = rule_added.rid;
+ tc_fltr->rule_id = rule_added.rule_id;
+ if (tc_fltr->action.tc_class > 0 && ch_vsi) {
+ /* For PF ADQ, VSI type is set as ICE_VSI_CHNL, and
+ * for PF ADQ filter, it is not yet set in tc_fltr,
+ * hence store the dest_vsi ptr in tc_fltr
+ */
+ if (ch_vsi->type == ICE_VSI_CHNL)
+ tc_fltr->dest_vsi = ch_vsi;
+ /* keep track of advanced switch filter for
+ * destination VSI (channel VSI)
+ */
+ ch_vsi->num_chnl_fltr++;
+ /* in this case, dest_id is VSI handle (sw handle) */
+ tc_fltr->dest_id = rule_added.vsi_handle;
+
+ /* keeps track of channel filters for PF VSI */
+ if (vsi->type == ICE_VSI_PF &&
+ (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_ENC_DST_MAC)))
+ pf->num_dmac_chnl_fltrs++;
+ }
+ dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x) for TC %u, rid %u, rule_id %u, vsi_idx %u\n",
+ lkups_cnt, flags,
+ tc_fltr->action.tc_class, rule_added.rid,
+ rule_added.rule_id, rule_added.vsi_handle);
+exit:
+ kfree(list);
+ return ret;
+}
+
+/**
* ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter
* @match: Pointer to flow match structure
* @fltr: Pointer to filter structure
@@ -561,10 +691,13 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
static int
ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
{
+ if (fltr->action.fltr_act == ICE_FWD_TO_QGRP)
+ return -EOPNOTSUPP;
+
if (ice_is_eswitch_mode_switchdev(vsi->back))
return ice_eswitch_add_tc_fltr(vsi, fltr);
- return -EOPNOTSUPP;
+ return ice_add_tc_flower_adv_fltr(vsi, fltr);
}
/**
@@ -581,6 +714,7 @@ ice_handle_tclass_action(struct ice_vsi *vsi,
struct ice_tc_flower_fltr *fltr)
{
int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+ struct ice_vsi *main_vsi;
if (tc < 0) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid");
@@ -591,13 +725,69 @@ ice_handle_tclass_action(struct ice_vsi *vsi,
return -EINVAL;
}
- if (!(vsi->tc_cfg.ena_tc & BIT(tc))) {
+ if (!(vsi->all_enatc & BIT(tc))) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination");
return -EINVAL;
}
/* Redirect to a TC class or Queue Group */
- fltr->action.fltr_act = ICE_FWD_TO_QGRP;
+ main_vsi = ice_get_main_vsi(vsi->back);
+ if (!main_vsi || !main_vsi->netdev) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because of invalid netdevice");
+ return -EINVAL;
+ }
+
+ if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) &&
+ (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_SRC_MAC))) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because filter using tunnel key and inner MAC is unsupported combination");
+ return -EOPNOTSUPP;
+ }
+
+ /* For ADQ, filter must include dest MAC address, otherwise unwanted
+ * packets with unrelated MAC address get delivered to ADQ VSIs as long
+ * as remaining filter criteria is satisfied such as dest IP address
+ * and dest/src L4 port. Following code is trying to handle:
+ * 1. For non-tunnel, if user specify MAC addresses, use them (means
+ * this code won't do anything
+ * 2. For non-tunnel, if user didn't specify MAC address, add implicit
+ * dest MAC to be lower netdev's active unicast MAC address
+ */
+ if (!(fltr->flags & ICE_TC_FLWR_FIELD_DST_MAC)) {
+ ether_addr_copy(fltr->outer_headers.l2_key.dst_mac,
+ main_vsi->netdev->dev_addr);
+ eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac);
+ fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC;
+ }
+
+ /* validate specified dest MAC address, make sure either it belongs to
+ * lower netdev or any of MACVLAN. MACVLANs MAC address are added as
+ * unicast MAC filter destined to main VSI.
+ */
+ if (!ice_mac_fltr_exist(&main_vsi->back->hw,
+ fltr->outer_headers.l2_key.dst_mac,
+ main_vsi->idx)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because legacy MAC filter for specified destination doesn't exist");
+ return -EINVAL;
+ }
+
+ /* Make sure VLAN is already added to main VSI, before allowing ADQ to
+ * add a VLAN based filter such as MAC + VLAN + L4 port.
+ */
+ if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) {
+ u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id);
+
+ if (!ice_vlan_fltr_exist(&main_vsi->back->hw, vlan_id,
+ main_vsi->idx)) {
+ NL_SET_ERR_MSG_MOD(fltr->extack,
+ "Unable to add filter because legacy VLAN filter for specified destination doesn't exist");
+ return -EINVAL;
+ }
+ }
+ fltr->action.fltr_act = ICE_FWD_TO_VSI;
fltr->action.tc_class = tc;
return 0;
@@ -639,8 +829,8 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi,
/* Drop action */
if (act->id == FLOW_ACTION_DROP) {
- fltr->action.fltr_act = ICE_DROP_PACKET;
- return 0;
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action DROP");
+ return -EINVAL;
}
fltr->action.fltr_act = ICE_FWD_TO_VSI;
}
@@ -673,6 +863,20 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
return -EIO;
}
+ /* update advanced switch filter count for destination
+ * VSI if filter destination was VSI
+ */
+ if (fltr->dest_vsi) {
+ if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
+ fltr->dest_vsi->num_chnl_fltr--;
+
+ /* keeps track of channel filters for PF VSI */
+ if (vsi->type == ICE_VSI_PF &&
+ (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+ ICE_TC_FLWR_FIELD_ENC_DST_MAC)))
+ pf->num_dmac_chnl_fltrs--;
+ }
+ }
return 0;
}
@@ -811,7 +1015,8 @@ ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower)
/* find filter */
fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie);
if (!fltr) {
- if (hlist_empty(&pf->tc_flower_fltr_list))
+ if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) &&
+ hlist_empty(&pf->tc_flower_fltr_list))
return 0;
NL_SET_ERR_MSG_MOD(cls_flower->common.extack, "failed to delete TC flower filter because unable to find it");
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
index d90e9e37ae25..ee9b284fcc02 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
@@ -120,6 +120,28 @@ struct ice_tc_flower_fltr {
struct netlink_ext_ack *extack;
};
+/**
+ * ice_is_chnl_fltr - is this a valid channel filter
+ * @f: Pointer to tc-flower filter
+ *
+ * Criteria to determine of given filter is valid channel filter
+ * or not is based on its "destination". If destination is hw_tc (aka tc_class)
+ * and it is non-zero, then it is valid channel (aka ADQ) filter
+ */
+static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f)
+{
+ return !!f->action.tc_class;
+}
+
+/**
+ * ice_chnl_dmac_fltr_cnt - DMAC based CHNL filter count
+ * @pf: Pointer to PF
+ */
+static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf)
+{
+ return pf->num_dmac_chnl_fltrs;
+}
+
int
ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
struct flow_cls_offload *cls_flower);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index c759a02bfce4..c56dd1749903 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -287,6 +287,7 @@ struct ice_rx_ring {
struct rcu_head rcu; /* to avoid race on free */
/* CL4 - 3rd cacheline starts here */
+ struct ice_channel *ch;
struct bpf_prog *xdp_prog;
struct ice_tx_ring *xdp_ring;
struct xsk_buff_pool *xsk_pool;
@@ -328,6 +329,7 @@ struct ice_tx_ring {
/* CL3 - 3rd cacheline starts here */
struct rcu_head rcu; /* to avoid race on free */
DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */
+ struct ice_channel *ch;
struct ice_ptp_tx *tx_tstamps;
spinlock_t tx_lock;
u32 txq_teid; /* Added Tx queue TEID */
@@ -352,6 +354,11 @@ static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring)
ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
}
+static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring)
+{
+ return !!ring->ch;
+}
+
static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring)
{
return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index d5cb1c5a89c0..9e0c2923c62e 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -138,6 +138,7 @@ enum ice_vsi_type {
ICE_VSI_PF = 0,
ICE_VSI_VF = 1,
ICE_VSI_CTRL = 3, /* equates to ICE_VSI_PF with 1 queue pair */
+ ICE_VSI_CHNL = 4,
ICE_VSI_LB = 6,
ICE_VSI_SWITCHDEV_CTRL = 7,
};
@@ -570,6 +571,8 @@ struct ice_sched_vsi_info {
struct list_head list_entry;
u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS];
+ /* bw_t_info saves VSI BW information */
+ struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
};
/* driver defines the policy */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 8e3f9ec4e35b..a42eaf6f942e 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -832,7 +832,7 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
- vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id);
+ vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id, NULL);
if (!vsi) {
dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
@@ -859,7 +859,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf)
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
- vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id);
+ vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id, NULL);
if (!vsi) {
dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n");
ice_vf_ctrl_invalidate_vsi(vf);