summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-10-31 20:56:59 +0100
committerDavid S. Miller <davem@davemloft.net>2018-10-31 20:56:59 +0100
commite2acdddde01511c74e4f6f1d5951a0f5d9bea14a (patch)
treeb189bb2159a9655bb89282d5f730de19e899a24c
parentMerge branch 'hns3-fixes' (diff)
parentselftests: mlxsw: qos_mc_aware: Add a test for UC awareness (diff)
downloadlinux-e2acdddde01511c74e4f6f1d5951a0f5d9bea14a.tar.xz
linux-e2acdddde01511c74e4f6f1d5951a0f5d9bea14a.zip
Merge branch 'mlxsw-Enable-minimum-shaper-on-MC-TCs'
Ido Schimmel says: ==================== mlxsw: Enable minimum shaper on MC TCs Petr says: An MC-aware mode was introduced in commit 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports"). In MC-aware mode, BUM traffic gets a special treatment by being assigned to a separate set of traffic classes 8..15. Pairs of TCs 0 and 8, 1 and 9, etc., are then configured to strictly prioritize the lower-numbered ones. The intention is to prevent BUM traffic from flooding the switch and push out all UC traffic, which would otherwise happen, and instead give UC traffic precedence. However strictly prioritizing UC traffic has the effect that UC overload pushes out all BUM traffic, such as legitimate ARP queries. These packets are kept in queues for a while, but under sustained UC overload, their lifetime eventually expires and these packets are dropped. That is detrimental to network performance as well. In this patchset, MC TCs (8..15) are configured with minimum shaper of 200Mbps (a minimum permitted value) to allow a trickle of necessary control traffic to get through. First in patch #1, the QEEC register is extended with fields necessary to configure the minimum shaper. In patch #2, minimum shaper is enabled on TCs 8..15. In patches #3 and #4, first the MC-awareness test is tweaked to support the minimum shaper, and then a new test is introduced to test that MC traffic behaves well under UC overload. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c25
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh95
3 files changed, 117 insertions, 25 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 32cb6718bb17..db3d2790aeec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
* Configures the ETS elements.
*/
#define MLXSW_REG_QEEC_ID 0x400D
-#define MLXSW_REG_QEEC_LEN 0x1C
+#define MLXSW_REG_QEEC_LEN 0x20
MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
@@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
*/
MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
+/* reg_qeec_mise
+ * Min shaper configuration enable. Enables configuration of the min
+ * shaper on this ETS element
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+
enum {
MLXSW_REG_QEEC_BYTES_MODE,
MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3342,6 +3351,17 @@ enum {
*/
MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
+/* The smallest permitted min shaper rate. */
+#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */
+
+/* reg_qeec_min_shaper_rate
+ * Min shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
+
/* reg_qeec_mase
* Max shaper configuration enable. Enables configuration of the max
* shaper on this ETS element.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8a4983adae94..a2df12b79f8e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}
+static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index, u32 minrate)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+ next_index);
+ mlxsw_reg_qeec_mise_set(qeec_pl, true);
+ mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
u8 switch_prio, u8 tclass)
{
@@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
+ /* Configure the min shaper for multicast TCs. */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i + 8, i,
+ MLXSW_REG_QEEC_MIS_MIN);
+ if (err)
+ return err;
+ }
+
/* Map all priorities to traffic class 0. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 0150bb2741eb..117f6f35d72f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -25,24 +25,24 @@
# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
# multicast traffic uses 8K frames.
#
-# +-----------------------+ +----------------------------------+
-# | H1 | | H2 |
-# | | | unicast --> + $h2.111 |
-# | | | traffic | 192.0.2.129/28 |
-# | multicast | | | e-qos-map 0:1 |
-# | traffic | | | |
-# | $h1 + <----- | | + $h2 |
-# +-----|-----------------+ +--------------|-------------------+
-# | |
-# +-----|-------------------------------------------------|-------------------+
-# | + $swp1 + $swp2 |
-# | | >1Gbps | >1Gbps |
-# | +---|----------------+ +----------|----------------+ |
-# | | + $swp1.1 | | + $swp2.111 | |
+# +---------------------------+ +----------------------------------+
+# | H1 | | H2 |
+# | | | unicast --> + $h2.111 |
+# | multicast | | traffic | 192.0.2.129/28 |
+# | traffic | | | e-qos-map 0:1 |
+# | $h1 + <----- | | | |
+# | 192.0.2.65/28 | | | + $h2 |
+# +---------------|-----------+ +--------------|-------------------+
+# | |
+# +---------------|---------------------------------------|-------------------+
+# | $swp1 + + $swp2 |
+# | >1Gbps | | >1Gbps |
+# | +-------------|------+ +----------|----------------+ |
+# | | $swp1.1 + | | + $swp2.111 | |
# | | BR1 | SW | BR111 | |
-# | | + $swp3.1 | | + $swp3.111 | |
-# | +---|----------------+ +----------|----------------+ |
-# | \_________________________________________________/ |
+# | | $swp3.1 + | | + $swp3.111 | |
+# | +-------------|------+ +----------|----------------+ |
+# | \_______________________________________/ |
# | | |
# | + $swp3 |
# | | 1Gbps bottleneck |
@@ -51,6 +51,7 @@
# |
# +--|-----------------+
# | + $h3 H3 |
+# | | 192.0.2.66/28 |
# | | |
# | + $h3.111 |
# | 192.0.2.130/28 |
@@ -59,6 +60,7 @@
ALL_TESTS="
ping_ipv4
test_mc_aware
+ test_uc_aware
"
lib_dir=$(dirname $0)/../../../net/forwarding
@@ -68,14 +70,14 @@ source $lib_dir/lib.sh
h1_create()
{
- simple_if_init $h1
+ simple_if_init $h1 192.0.2.65/28
mtu_set $h1 10000
}
h1_destroy()
{
mtu_restore $h1
- simple_if_fini $h1
+ simple_if_fini $h1 192.0.2.65/28
}
h2_create()
@@ -97,7 +99,7 @@ h2_destroy()
h3_create()
{
- simple_if_init $h3
+ simple_if_init $h3 192.0.2.66/28
mtu_set $h3 10000
vlan_create $h3 111 v$h3 192.0.2.130/28
@@ -108,7 +110,7 @@ h3_destroy()
vlan_destroy $h3 111
mtu_restore $h3
- simple_if_fini $h3
+ simple_if_fini $h3 192.0.2.66/28
}
switch_create()
@@ -251,7 +253,7 @@ measure_uc_rate()
# average ingress rate to somewhat mitigate this.
local min_ingress=2147483648
- mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+ $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-a own -b $h3mac -t udp -q &
sleep 1
@@ -291,7 +293,7 @@ test_mc_aware()
check_err $? "Could not get high enough UC-only ingress rate"
local ucth1=${uc_rate[1]}
- mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+ $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
@@ -311,7 +313,7 @@ test_mc_aware()
ret = 100 * ($ucth1 - $ucth2) / $ucth1
if (ret > 0) { ret } else { 0 }
")
- check_err $(bc <<< "$deg > 10")
+ check_err $(bc <<< "$deg > 25")
local interval=$((d1 - d0))
local mc_ir=$(rate $u0 $u1 $interval)
@@ -335,6 +337,51 @@ test_mc_aware()
echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
echo " ingress MC throughput $(humanize $mc_ir)"
echo " egress MC throughput $(humanize $mc_er)"
+ echo
+}
+
+test_uc_aware()
+{
+ RET=0
+
+ $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+ -a own -b $h3mac -t udp -q &
+
+ local d0=$(date +%s)
+ local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+ sleep 1
+
+ local attempts=50
+ local passes=0
+ local i
+
+ for ((i = 0; i < attempts; ++i)); do
+ if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+ ((passes++))
+ fi
+
+ sleep 0.1
+ done
+
+ local d1=$(date +%s)
+ local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+ local interval=$((d1 - d0))
+ local uc_ir=$(rate $u0 $u1 $interval)
+ local uc_er=$(rate $t0 $t1 $interval)
+
+ ((attempts == passes))
+ check_err $?
+
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait; } 2>/dev/null
+
+ log_test "MC performace under UC overload"
+ echo " ingress UC throughput $(humanize ${uc_ir})"
+ echo " egress UC throughput $(humanize ${uc_er})"
+ echo " sent $attempts BC ARPs, got $passes responses"
}
trap cleanup EXIT