summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-10-15 21:43:21 +0200
committerJakub Kicinski <kuba@kernel.org>2020-10-15 21:43:21 +0200
commit2295cddf99e3f7c2be2b1160e2f5e53cc35b09be (patch)
tree2c5a619c8aee5ef68028a65a1c059736b005846b
parentnetfilter: nftables: allow re-computing sctp CRC-32C in 'payload' statements (diff)
parentRevert "bpfilter: Fix build error with CONFIG_BPFILTER_UMH" (diff)
downloadlinux-2295cddf99e3f7c2be2b1160e2f5e53cc35b09be.tar.xz
linux-2295cddf99e3f7c2be2b1160e2f5e53cc35b09be.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Minor conflicts in net/mptcp/protocol.h and tools/testing/selftests/net/Makefile. In both cases code was added on both sides in the same place so just keep both. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--Documentation/networking/scaling.rst6
-rw-r--r--drivers/net/can/m_can/m_can_platform.c2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c16
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c175
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h15
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c35
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c19
-rw-r--r--drivers/net/ethernet/korina.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c2
-rw-r--r--drivers/net/ethernet/ti/tlan.c2
-rw-r--r--drivers/net/ipa/ipa_endpoint.c6
-rw-r--r--include/net/netfilter/nf_log.h1
-rw-r--r--include/net/tc_act/tc_tunnel_key.h5
-rw-r--r--include/net/tls.h4
-rw-r--r--net/can/j1939/transport.c2
-rw-r--r--net/core/sock.c11
-rw-r--r--net/ipv4/icmp.c23
-rw-r--r--net/ipv4/ip_gre.c15
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c19
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c6
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv6/icmp.c7
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c8
-rw-r--r--net/ipv6/route.c3
-rw-r--r--net/mptcp/options.c34
-rw-r--r--net/mptcp/protocol.c17
-rw-r--r--net/mptcp/protocol.h4
-rw-r--r--net/mptcp/subflow.c19
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_log_common.c12
-rw-r--r--net/netfilter/nf_tables_api.c19
-rw-r--r--net/smc/smc_core.c5
-rw-r--r--net/smc/smc_llc.c19
-rw-r--r--net/tipc/msg.c3
-rw-r--r--net/tipc/name_distr.c10
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tls/tls_device.c11
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh626
-rw-r--r--tools/testing/selftests/netfilter/nf-queue.c61
-rwxr-xr-xtools/testing/selftests/netfilter/nft_meta.sh32
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh70
45 files changed, 1209 insertions, 140 deletions
diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst
index 8f0347b9fb3d..3d435caa3ef2 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -465,9 +465,9 @@ XPS Configuration
-----------------
XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
-default for SMP). The functionality remains disabled until explicitly
-configured. To enable XPS, the bitmap of CPUs/receive-queues that may
-use a transmit queue is configured using the sysfs file entry:
+default for SMP). If compiled in, it is driver dependent whether, and
+how, XPS is configured at device init. The mapping of CPUs/receive-queues
+to transmit queue can be inspected and configured using sysfs:
For selection based on CPUs map::
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index 38ea5e600fb8..e6d0cb9ee02f 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -144,8 +144,6 @@ static int __maybe_unused m_can_runtime_suspend(struct device *dev)
struct net_device *ndev = dev_get_drvdata(dev);
struct m_can_classdev *mcan_class = netdev_priv(ndev);
- m_can_class_suspend(dev);
-
clk_disable_unprepare(mcan_class->cclk);
clk_disable_unprepare(mcan_class->hclk);
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 01f5784f69cb..0ef854911f21 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -103,14 +103,8 @@ void ksz_init_mib_timer(struct ksz_device *dev)
INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
- /* Read MIB counters every 30 seconds to avoid overflow. */
- dev->mib_read_interval = msecs_to_jiffies(30000);
-
for (i = 0; i < dev->mib_port_cnt; i++)
dev->dev_ops->port_init_cnt(dev, i);
-
- /* Start the timer 2 seconds later. */
- schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
}
EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
@@ -143,7 +137,9 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
/* Read all MIB counters when the link is going down. */
p->read = true;
- schedule_delayed_work(&dev->mib_read, 0);
+ /* timer started */
+ if (dev->mib_read_interval)
+ schedule_delayed_work(&dev->mib_read, 0);
}
EXPORT_SYMBOL_GPL(ksz_mac_link_down);
@@ -451,6 +447,12 @@ int ksz_switch_register(struct ksz_device *dev,
return ret;
}
+ /* Read MIB counters every 30 seconds to avoid overflow. */
+ dev->mib_read_interval = msecs_to_jiffies(30000);
+
+ /* Start the MIB timer. */
+ schedule_delayed_work(&dev->mib_read, 0);
+
return 0;
}
EXPORT_SYMBOL(ksz_switch_register);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index f642c1b475c4..1b88bd1c2dbe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -60,6 +60,89 @@ static struct ch_tc_pedit_fields pedits[] = {
PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
};
+static const struct cxgb4_natmode_config cxgb4_natmode_config_array[] = {
+ /* Default supported NAT modes */
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_NONE,
+ .natmode = NAT_MODE_NONE,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP,
+ .natmode = NAT_MODE_DIP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT,
+ .natmode = NAT_MODE_DIP_DP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+ CXGB4_ACTION_NATMODE_SIP,
+ .natmode = NAT_MODE_DIP_DP_SIP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
+ CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_DIP_DP_SP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_SIP | CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_SIP_SP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+ CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_DIP_SIP_SP,
+ },
+ {
+ .chip = CHELSIO_T5,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
+ CXGB4_ACTION_NATMODE_DPORT |
+ CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_ALL,
+ },
+ /* T6+ can ignore L4 ports when they're disabled. */
+ {
+ .chip = CHELSIO_T6,
+ .flags = CXGB4_ACTION_NATMODE_SIP,
+ .natmode = NAT_MODE_SIP_SP,
+ },
+ {
+ .chip = CHELSIO_T6,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SPORT,
+ .natmode = NAT_MODE_DIP_DP_SP,
+ },
+ {
+ .chip = CHELSIO_T6,
+ .flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP,
+ .natmode = NAT_MODE_ALL,
+ },
+};
+
+static void cxgb4_action_natmode_tweak(struct ch_filter_specification *fs,
+ u8 natmode_flags)
+{
+ u8 i = 0;
+
+ /* Translate the enabled NAT 4-tuple fields to one of the
+ * hardware supported NAT mode configurations. This ensures
+ * that we pick a valid combination, where the disabled fields
+ * do not get overwritten to 0.
+ */
+ for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+ if (cxgb4_natmode_config_array[i].flags == natmode_flags) {
+ fs->nat_mode = cxgb4_natmode_config_array[i].natmode;
+ return;
+ }
+ }
+}
+
static struct ch_tc_flower_entry *allocate_flower_entry(void)
{
struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
@@ -289,7 +372,8 @@ static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask,
}
static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
- u32 mask, u32 offset, u8 htype)
+ u32 mask, u32 offset, u8 htype,
+ u8 *natmode_flags)
{
switch (htype) {
case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
@@ -314,60 +398,94 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
switch (offset) {
case PEDIT_IP4_SRC:
offload_pedit(fs, val, mask, IP4_SRC);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP4_DST:
offload_pedit(fs, val, mask, IP4_DST);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
}
- fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
switch (offset) {
case PEDIT_IP6_SRC_31_0:
offload_pedit(fs, val, mask, IP6_SRC_31_0);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_63_32:
offload_pedit(fs, val, mask, IP6_SRC_63_32);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_95_64:
offload_pedit(fs, val, mask, IP6_SRC_95_64);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_127_96:
offload_pedit(fs, val, mask, IP6_SRC_127_96);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_DST_31_0:
offload_pedit(fs, val, mask, IP6_DST_31_0);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_63_32:
offload_pedit(fs, val, mask, IP6_DST_63_32);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_95_64:
offload_pedit(fs, val, mask, IP6_DST_95_64);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_127_96:
offload_pedit(fs, val, mask, IP6_DST_127_96);
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
}
- fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
switch (offset) {
case PEDIT_TCP_SPORT_DPORT:
- if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val;
- else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ } else {
fs->nat_lport = val >> 16;
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+ }
}
- fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
switch (offset) {
case PEDIT_UDP_SPORT_DPORT:
- if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val;
- else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ } else {
fs->nat_lport = val >> 16;
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
+ }
}
- fs->nat_mode = NAT_MODE_ALL;
+ break;
+ }
+}
+
+static int cxgb4_action_natmode_validate(struct adapter *adap, u8 natmode_flags,
+ struct netlink_ext_ack *extack)
+{
+ u8 i = 0;
+
+ /* Extract the NAT mode to enable based on what 4-tuple fields
+ * are enabled to be overwritten. This ensures that the
+ * disabled fields don't get overwritten to 0.
+ */
+ for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
+ const struct cxgb4_natmode_config *c;
+
+ c = &cxgb4_natmode_config_array[i];
+ if (CHELSIO_CHIP_VERSION(adap->params.chip) >= c->chip &&
+ natmode_flags == c->flags)
+ return 0;
}
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported NAT mode 4-tuple combination");
+ return -EOPNOTSUPP;
}
void cxgb4_process_flow_actions(struct net_device *in,
@@ -375,6 +493,7 @@ void cxgb4_process_flow_actions(struct net_device *in,
struct ch_filter_specification *fs)
{
struct flow_action_entry *act;
+ u8 natmode_flags = 0;
int i;
flow_action_for_each(i, act, actions) {
@@ -426,7 +545,8 @@ void cxgb4_process_flow_actions(struct net_device *in,
val = act->mangle.val;
offset = act->mangle.offset;
- process_pedit_field(fs, val, mask, offset, htype);
+ process_pedit_field(fs, val, mask, offset, htype,
+ &natmode_flags);
}
break;
case FLOW_ACTION_QUEUE:
@@ -438,6 +558,9 @@ void cxgb4_process_flow_actions(struct net_device *in,
break;
}
}
+ if (natmode_flags)
+ cxgb4_action_natmode_tweak(fs, natmode_flags);
+
}
static bool valid_l4_mask(u32 mask)
@@ -454,7 +577,8 @@ static bool valid_l4_mask(u32 mask)
}
static bool valid_pedit_action(struct net_device *dev,
- const struct flow_action_entry *act)
+ const struct flow_action_entry *act,
+ u8 *natmode_flags)
{
u32 mask, offset;
u8 htype;
@@ -479,7 +603,10 @@ static bool valid_pedit_action(struct net_device *dev,
case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
switch (offset) {
case PEDIT_IP4_SRC:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+ break;
case PEDIT_IP4_DST:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -493,10 +620,13 @@ static bool valid_pedit_action(struct net_device *dev,
case PEDIT_IP6_SRC_63_32:
case PEDIT_IP6_SRC_95_64:
case PEDIT_IP6_SRC_127_96:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
+ break;
case PEDIT_IP6_DST_31_0:
case PEDIT_IP6_DST_63_32:
case PEDIT_IP6_DST_95_64:
case PEDIT_IP6_DST_127_96:
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -512,6 +642,10 @@ static bool valid_pedit_action(struct net_device *dev,
__func__);
return false;
}
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -527,6 +661,10 @@ static bool valid_pedit_action(struct net_device *dev,
__func__);
return false;
}
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ *natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
+ else
+ *natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@@ -546,10 +684,12 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
struct netlink_ext_ack *extack,
u8 matchall_filter)
{
+ struct adapter *adap = netdev2adap(dev);
struct flow_action_entry *act;
bool act_redir = false;
bool act_pedit = false;
bool act_vlan = false;
+ u8 natmode_flags = 0;
int i;
if (!flow_action_basic_hw_stats_check(actions, extack))
@@ -563,7 +703,6 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
break;
case FLOW_ACTION_MIRRED:
case FLOW_ACTION_REDIRECT: {
- struct adapter *adap = netdev2adap(dev);
struct net_device *n_dev, *target_dev;
bool found = false;
unsigned int i;
@@ -620,7 +759,8 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
}
break;
case FLOW_ACTION_MANGLE: {
- bool pedit_valid = valid_pedit_action(dev, act);
+ bool pedit_valid = valid_pedit_action(dev, act,
+ &natmode_flags);
if (!pedit_valid)
return -EOPNOTSUPP;
@@ -642,6 +782,15 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
return -EINVAL;
}
+ if (act_pedit) {
+ int ret;
+
+ ret = cxgb4_action_natmode_validate(adap, natmode_flags,
+ extack);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
index 6296e1d5a12b..3a2fa00c8cde 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -108,6 +108,21 @@ struct ch_tc_pedit_fields {
#define PEDIT_TCP_SPORT_DPORT 0x0
#define PEDIT_UDP_SPORT_DPORT 0x0
+enum cxgb4_action_natmode_flags {
+ CXGB4_ACTION_NATMODE_NONE = 0,
+ CXGB4_ACTION_NATMODE_DIP = (1 << 0),
+ CXGB4_ACTION_NATMODE_SIP = (1 << 1),
+ CXGB4_ACTION_NATMODE_DPORT = (1 << 2),
+ CXGB4_ACTION_NATMODE_SPORT = (1 << 3),
+};
+
+/* TC PEDIT action to NATMODE translation entry */
+struct cxgb4_natmode_config {
+ enum chip_type chip;
+ u8 flags;
+ u8 natmode;
+};
+
void cxgb4_process_flow_actions(struct net_device *in,
struct flow_action *actions,
struct ch_filter_specification *fs);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c043afb38b6e..8f7eca1e7716 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1912,6 +1912,27 @@ out:
return ret;
}
+static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ struct phy_device *phy_dev = ndev->phydev;
+
+ if (phy_dev) {
+ phy_reset_after_clk_enable(phy_dev);
+ } else if (fep->phy_node) {
+ /*
+ * If the PHY still is not bound to the MAC, but there is
+ * OF PHY node and a matching PHY device instance already,
+ * use the OF PHY node to obtain the PHY device instance,
+ * and then use that PHY device instance when triggering
+ * the PHY reset.
+ */
+ phy_dev = of_phy_find_device(fep->phy_node);
+ phy_reset_after_clk_enable(phy_dev);
+ put_device(&phy_dev->mdio.dev);
+ }
+}
+
static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -1938,7 +1959,7 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
if (ret)
goto failed_clk_ref;
- phy_reset_after_clk_enable(ndev->phydev);
+ fec_enet_phy_reset_after_clk_enable(ndev);
} else {
clk_disable_unprepare(fep->clk_enet_out);
if (fep->clk_ptp) {
@@ -2983,16 +3004,16 @@ fec_enet_open(struct net_device *ndev)
/* Init MAC prior to mii bus probe */
fec_restart(ndev);
- /* Probe and connect to PHY when open the interface */
- ret = fec_enet_mii_probe(ndev);
- if (ret)
- goto err_enet_mii_probe;
-
/* Call phy_reset_after_clk_enable() again if it failed during
* phy_reset_after_clk_enable() before because the PHY wasn't probed.
*/
if (reset_again)
- phy_reset_after_clk_enable(ndev->phydev);
+ fec_enet_phy_reset_after_clk_enable(ndev);
+
+ /* Probe and connect to PHY when open the interface */
+ ret = fec_enet_mii_probe(ndev);
+ if (ret)
+ goto err_enet_mii_probe;
if (fep->quirks & FEC_QUIRK_ERR006687)
imx6q_cpuidle_fec_irqs_used();
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index c5c732601e35..7ef3369953b6 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1349,6 +1349,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
int offset = ibmveth_rxq_frame_offset(adapter);
int csum_good = ibmveth_rxq_csum_good(adapter);
int lrg_pkt = ibmveth_rxq_large_packet(adapter);
+ __sum16 iph_check = 0;
skb = ibmveth_rxq_get_buffer(adapter);
@@ -1385,16 +1386,26 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, netdev);
- if (csum_good) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- ibmveth_rx_csum_helper(skb, adapter);
+ /* PHYP without PLSO support places a -1 in the ip
+ * checksum for large send frames.
+ */
+ if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)skb->data;
+
+ iph_check = iph->check;
}
- if (length > netdev->mtu + ETH_HLEN) {
+ if ((length > netdev->mtu + ETH_HLEN) ||
+ lrg_pkt || iph_check == 0xffff) {
ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
adapter->rx_large_packets++;
}
+ if (csum_good) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ ibmveth_rx_csum_helper(skb, adapter);
+ }
+
napi_gro_receive(napi, skb); /* send it up */
netdev->stats.rx_packets++;
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 03e034918d14..af441d699a57 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1113,7 +1113,7 @@ out:
return rc;
probe_err_register:
- kfree(lp->td_ring);
+ kfree(KSEG0ADDR(lp->td_ring));
probe_err_td_ring:
iounmap(lp->tx_dma_regs);
probe_err_dma_tx:
@@ -1133,6 +1133,7 @@ static int korina_remove(struct platform_device *pdev)
iounmap(lp->eth_regs);
iounmap(lp->rx_dma_regs);
iounmap(lp->tx_dma_regs);
+ kfree(KSEG0ADDR(lp->td_ring));
unregister_netdev(bif->dev);
free_netdev(bif->dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 99d7737e8ad6..502d1b97855c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -943,6 +943,9 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
bool clean_complete = true;
int done;
+ if (!budget)
+ return 0;
+
if (priv->tx_ring_num[TX_XDP]) {
xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
if (xdp_tx_cq->xdp_busy) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 45869b29368f..3ddb7268e415 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -350,7 +350,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
.dma = tx_info->map0_dma,
};
- if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
+ if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
dma_unmap_page(priv->ddev, tx_info->map0_dma,
PAGE_SIZE, priv->dma_dir);
put_page(tx_info->page);
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 317fad787d78..267c080ee084 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -2504,7 +2504,7 @@ static void tlan_phy_power_down(struct net_device *dev)
}
/* Wait for 50 ms and powerup
- * This is abitrary. It is intended to make sure the
+ * This is arbitrary. It is intended to make sure the
* transceiver settles.
*/
tlan_set_timer(dev, msecs_to_jiffies(50), TLAN_TIMER_PHY_PUP);
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 24d688e3cd93..b40b711cf4bd 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1432,6 +1432,9 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
void ipa_endpoint_suspend(struct ipa *ipa)
{
+ if (!ipa->setup_complete)
+ return;
+
if (ipa->modem_netdev)
ipa_modem_suspend(ipa->modem_netdev);
@@ -1443,6 +1446,9 @@ void ipa_endpoint_suspend(struct ipa *ipa)
void ipa_endpoint_resume(struct ipa *ipa)
{
+ if (!ipa->setup_complete)
+ return;
+
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 0d3920896d50..716db4a0fed8 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -108,6 +108,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
unsigned int logflags);
void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
struct sock *sk);
+void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index e1057b255f69..879fe8cff581 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -56,7 +56,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_tunnel_key *t = to_tunnel_key(a);
- struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+ struct tcf_tunnel_key_params *params;
+
+ params = rcu_dereference_protected(t->params,
+ lockdep_is_held(&a->tcfa_lock));
return &params->tcft_enc_metadata->u.tun_info;
#else
diff --git a/include/net/tls.h b/include/net/tls.h
index e5dac7e74e79..baf1e99d8193 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -679,10 +679,6 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgout);
struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
-struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
- struct net_device *dev,
- struct sk_buff *skb);
-
int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 0cec4152f979..e09d087ba240 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -580,6 +580,7 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
/* reserve CAN header */
skb_reserve(skb, offsetof(struct can_frame, data));
@@ -1487,6 +1488,7 @@ j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
skcb = j1939_skb_to_cb(skb);
memcpy(skcb, rel_skcb, sizeof(*skcb));
diff --git a/net/core/sock.c b/net/core/sock.c
index d9a537e6876a..4e8729357122 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -757,7 +757,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
} else {
sock_reset_flag(sk, SOCK_RCVTSTAMP);
sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
}
}
@@ -994,8 +993,6 @@ set_sndbuf:
__sock_set_timestamps(sk, valbool, true, true);
break;
case SO_TIMESTAMPING_NEW:
- sock_set_flag(sk, SOCK_TSTAMP_NEW);
- fallthrough;
case SO_TIMESTAMPING_OLD:
if (val & ~SOF_TIMESTAMPING_MASK) {
ret = -EINVAL;
@@ -1024,16 +1021,14 @@ set_sndbuf:
}
sk->sk_tsflags = val;
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk,
SOCK_TIMESTAMPING_RX_SOFTWARE);
- else {
- if (optname == SO_TIMESTAMPING_NEW)
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
+ else
sock_disable_timestamp(sk,
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
- }
break;
case SO_RCVLOWAT:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8f2e974a1e4d..e8d704de728c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -457,6 +457,23 @@ out_bh_enable:
local_bh_enable();
}
+/*
+ * The device used for looking up which routing table to use for sending an ICMP
+ * error is preferably the source whenever it is set, which should ensure the
+ * icmp error can be sent to the source host, else lookup using the routing
+ * table of the destination device, else use the main routing table (index 0).
+ */
+static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
+{
+ struct net_device *route_lookup_dev = NULL;
+
+ if (skb->dev)
+ route_lookup_dev = skb->dev;
+ else if (skb_dst(skb))
+ route_lookup_dev = skb_dst(skb)->dev;
+ return route_lookup_dev;
+}
+
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -465,6 +482,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
int type, int code,
struct icmp_bxm *param)
{
+ struct net_device *route_lookup_dev;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -479,7 +497,8 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
+ route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
+ fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = ip_route_output_key_hash(net, fl4, skb_in);
@@ -503,7 +522,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
+ if (inet_addr_type_dev_table(net, route_lookup_dev,
fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4e31f23e4117..e70291748889 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -625,9 +625,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
- /* Need space for new headers */
- if (skb_cow_head(skb, dev->needed_headroom -
- (tunnel->hlen + sizeof(struct iphdr))))
+ if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
@@ -748,7 +746,11 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
len = tunnel->tun_hlen - len;
tunnel->hlen = tunnel->hlen + len;
- dev->needed_headroom = dev->needed_headroom + len;
+ if (dev->header_ops)
+ dev->hard_header_len += len;
+ else
+ dev->needed_headroom += len;
+
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
@@ -944,6 +946,7 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+ dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -987,10 +990,14 @@ static int ipgre_tunnel_init(struct net_device *dev)
return -EINVAL;
dev->flags = IFF_BROADCAST;
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
}
#endif
} else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
}
return ip_tunnel_init(dev);
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 7a83f881efa9..136030ad2e54 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -43,16 +43,31 @@ static void dump_arp_packet(struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb, unsigned int nhoff)
{
- const struct arphdr *ah;
- struct arphdr _arph;
const struct arppayload *ap;
struct arppayload _arpp;
+ const struct arphdr *ah;
+ unsigned int logflags;
+ struct arphdr _arph;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
nf_log_buf_add(m, "TRUNCATED");
return;
}
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_DEFAULT_MASK;
+
+ if (logflags & NF_LOG_MACDECODE) {
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
+ ntohs(eth_hdr(skb)->h_proto));
+ }
+
nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 0c72156130b6..d07583fac8f8 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -284,8 +284,10 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d15a78b26dfa..dc2a399cd9f4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2770,10 +2770,12 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (IS_ERR(rt))
return rt;
- if (flp4->flowi4_proto)
+ if (flp4->flowi4_proto) {
+ flp4->flowi4_oif = rt->dst.dev->ifindex;
rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
flowi4_to_flowi(flp4),
sk, 0);
+ }
return rt;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a4e4912ad607..91209a2760aa 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -501,8 +501,11 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (__ipv6_addr_needs_scope_id(addr_type)) {
iif = icmp6_iif(skb);
} else {
- dst = skb_dst(skb);
- iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
+ /*
+ * The source device is used for looking up which routing table
+ * to use for sending an ICMP error.
+ */
+ iif = l3mdev_master_ifindex(skb->dev);
}
/*
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 141c0a4c569a..605cdd38a919 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2622,8 +2622,10 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
iter->skip = *pos;
if (iter->tbl) {
+ loff_t p = 0;
+
ipv6_route_seq_setup_walk(iter, net);
- return ipv6_route_seq_next(seq, NULL, pos);
+ return ipv6_route_seq_next(seq, NULL, &p);
} else {
return NULL;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a2a65e327f49..5941bfbc7c47 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -468,8 +468,6 @@ int ip6_forward(struct sk_buff *skb)
* check and decrement ttl
*/
if (hdr->hop_limit <= 1) {
- /* Force OUTPUT device used as source address */
- skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index da64550a5707..8210ff34ed9b 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -297,9 +297,11 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
+ ntohs(eth_hdr(skb)->h_proto));
return;
default:
break;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bde6d48a9942..7e0ce7af8234 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2745,7 +2745,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (confirm_neigh)
dst_confirm_neigh(dst, daddr);
- mtu = max_t(u32, mtu, IPV6_MIN_MTU);
+ if (mtu < IPV6_MIN_MTU)
+ return;
if (mtu >= dst_mtu(dst))
return;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4d35fa998fcc..092a2d48bfd3 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -517,7 +517,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return ret;
}
- if (subflow->use_64bit_ack) {
+ if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
opts->ext_copy.ack64 = 1;
@@ -657,6 +657,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
if (unlikely(mptcp_check_fallback(sk)))
return false;
+ /* prevent adding of any MPTCP related options on reset packet
+ * until we support MP_TCPRST/MP_FASTCLOSE
+ */
+ if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
+ return false;
+
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@@ -711,7 +717,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
return false;
}
-static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
+static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
@@ -728,15 +734,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && mp_opt->mp_join &&
READ_ONCE(msk->pm.server_side))
- tcp_send_ack(sk);
+ tcp_send_ack(ssk);
goto fully_established;
}
- /* we should process OoO packets before the first subflow is fully
- * established, but not expected for MP_JOIN subflows
+ /* we must process OoO packets before the first subflow is fully
+ * established. OoO packets are instead a protocol violation
+ * for MP_JOIN subflows as the peer must not send any data
+ * before receiving the forth ack - cfr. RFC 8684 section 3.2.
*/
- if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
+ if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
+ if (subflow->mp_join)
+ goto reset;
return subflow->mp_capable;
+ }
if (mp_opt->dss && mp_opt->use_ack) {
/* subflows are fully established as soon as we get any
@@ -748,9 +759,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
}
/* If the first established packet does not contain MP_CAPABLE + data
- * then fallback to TCP
+ * then fallback to TCP. Fallback scenarios requires a reset for
+ * MP_JOIN subflows.
*/
if (!mp_opt->mp_capable) {
+ if (subflow->mp_join)
+ goto reset;
subflow->mp_capable = 0;
pr_fallback(msk);
__mptcp_do_fallback(msk);
@@ -767,12 +781,16 @@ fully_established:
subflow->pm_notified = 1;
if (subflow->mp_join) {
- clear_3rdack_retransmission(sk);
+ clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow);
} else {
mptcp_pm_fully_established(msk);
}
return true;
+
+reset:
+ mptcp_subflow_reset(ssk);
+ return false;
}
static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9816608da452..185dacb39781 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1710,6 +1710,20 @@ static void pm_work(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
+static void __mptcp_close_subflow(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow, *tmp;
+
+ list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ if (inet_sk_state_load(ssk) != TCP_CLOSE)
+ continue;
+
+ __mptcp_close_ssk((struct sock *)msk, ssk, subflow, 0);
+ }
+}
+
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -1727,6 +1741,9 @@ static void mptcp_worker(struct work_struct *work)
mptcp_clean_una(sk);
mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk);
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(msk);
+
__mptcp_move_skbs(msk);
if (msk->pm.status)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index aa0ab18d2e57..13ab89dc1914 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -90,6 +90,7 @@
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
+#define MPTCP_WORK_CLOSE_SUBFLOW 5
struct mptcp_options_received {
u64 sndr_key;
@@ -211,6 +212,7 @@ struct mptcp_sock {
bool fully_established;
bool rcv_data_fin;
bool snd_data_fin_enable;
+ bool use_64bit_ack; /* Set when we received a 64-bit DSN */
spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -310,7 +312,6 @@ struct mptcp_subflow_context {
mpc_map : 1,
backup : 1,
rx_eof : 1,
- use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
@@ -369,6 +370,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
long timeout);
+void mptcp_subflow_reset(struct sock *ssk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5d91e3a2cd30..ac4a1fe3550b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -271,6 +271,19 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
return thmac == subflow->thmac;
}
+void mptcp_subflow_reset(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+
+ tcp_set_state(ssk, TCP_CLOSE);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ tcp_done(ssk);
+ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+ schedule_work(&mptcp_sk(sk)->work))
+ sock_hold(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -343,8 +356,7 @@ fallback:
return;
do_reset:
- tcp_send_active_reset(sk, GFP_ATOMIC);
- tcp_done(sk);
+ mptcp_subflow_reset(sk);
}
struct request_sock_ops mptcp_subflow_request_sock_ops;
@@ -770,12 +782,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (!mpext->dsn64) {
map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
mpext->data_seq);
- subflow->use_64bit_ack = 0;
pr_debug("expanded seq=%llu", subflow->map_seq);
} else {
map_seq = mpext->data_seq;
- subflow->use_64bit_ack = 1;
}
+ WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
if (subflow->map_valid) {
/* Allow replacing only with an identical map */
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b00866d777fe..d2e5a8f644b8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -609,6 +609,8 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset_ct(skb);
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
}
return ret;
}
@@ -649,6 +651,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@@ -669,6 +673,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index ae5628ddbe6d..fd7c5f0f5c25 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -171,6 +171,18 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
+void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
+{
+ u16 vid;
+
+ if (!skb_vlan_tag_present(skb))
+ return;
+
+ vid = skb_vlan_tag_get(skb);
+ nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_vlan);
+
/* bridge and netdev logging families share this code. */
void nf_log_l2packet(struct net *net, u_int8_t pf,
__be16 protocol,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7f1c184c00d2..9957e0ed8658 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2138,7 +2138,8 @@ static bool nft_hook_list_equal(struct list_head *hook_list1,
}
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
- u32 flags)
+ u32 flags, const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
@@ -2154,9 +2155,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_HOOK]) {
- if (!nft_is_base_chain(chain))
+ if (!nft_is_base_chain(chain)) {
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
-
+ }
err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
false);
if (err < 0)
@@ -2165,6 +2167,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
basechain = nft_base_chain(chain);
if (basechain->type != hook.type) {
nft_chain_release_hook(&hook);
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
@@ -2172,6 +2175,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (!nft_hook_list_equal(&basechain->hook_list,
&hook.list)) {
nft_chain_release_hook(&hook);
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
} else {
@@ -2179,6 +2183,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (ops->hooknum != hook.num ||
ops->priority != hook.priority) {
nft_chain_release_hook(&hook);
+ NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
}
@@ -2191,8 +2196,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
chain2 = nft_chain_lookup(ctx->net, table,
nla[NFTA_CHAIN_NAME], genmask);
- if (!IS_ERR(chain2))
+ if (!IS_ERR(chain2)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return -EEXIST;
+ }
}
if (nla[NFTA_CHAIN_COUNTERS]) {
@@ -2235,6 +2242,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
kfree(name);
goto err;
}
@@ -2357,7 +2365,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return -EOPNOTSUPP;
flags |= chain->flags & NFT_CHAIN_BASE;
- return nf_tables_updchain(&ctx, genmask, policy, flags);
+ return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
+ extack);
}
return nf_tables_addchain(&ctx, family, genmask, policy, flags);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f1dbb5025c0b..d790c43c473f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1596,7 +1596,7 @@ out:
return rc;
}
-#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
@@ -1615,7 +1615,8 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) {
kfree(buf_desc);
- return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc);
+ return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) :
+ ERR_PTR(-EIO);
}
buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 2db967f2fb50..273eaf1bfe49 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -233,8 +233,6 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
default:
flow->type = SMC_LLC_FLOW_NONE;
}
- if (qentry == lgr->delayed_event)
- lgr->delayed_event = NULL;
smc_llc_flow_qentry_set(flow, qentry);
spin_unlock_bh(&lgr->llc_flow_lock);
return true;
@@ -1209,7 +1207,7 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
/* enqueue a local add_link req to trigger a new add_link flow */
void smc_llc_add_link_local(struct smc_link *link)
{
- struct smc_llc_msg_add_link add_llc = {0};
+ struct smc_llc_msg_add_link add_llc = {};
add_llc.hd.length = sizeof(add_llc);
add_llc.hd.common.type = SMC_LLC_ADD_LINK;
@@ -1242,7 +1240,7 @@ out:
*/
void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{
- struct smc_llc_msg_del_link del_llc = {0};
+ struct smc_llc_msg_del_link del_llc = {};
del_llc.hd.length = sizeof(del_llc);
del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
@@ -1314,7 +1312,7 @@ out:
*/
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
{
- struct smc_llc_msg_del_link delllc = {0};
+ struct smc_llc_msg_del_link delllc = {};
int i;
delllc.hd.common.type = SMC_LLC_DELETE_LINK;
@@ -1603,13 +1601,12 @@ static void smc_llc_event_work(struct work_struct *work)
struct smc_llc_qentry *qentry;
if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
- if (smc_link_usable(lgr->delayed_event->link)) {
- smc_llc_event_handler(lgr->delayed_event);
- } else {
- qentry = lgr->delayed_event;
- lgr->delayed_event = NULL;
+ qentry = lgr->delayed_event;
+ lgr->delayed_event = NULL;
+ if (smc_link_usable(qentry->link))
+ smc_llc_event_handler(qentry);
+ else
kfree(qentry);
- }
}
again:
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 11a429f4c6cd..2a78aa701572 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- frag = skb_unshare(frag, GFP_ATOMIC);
+ if (skb_cloned(frag))
+ frag = skb_copy(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
head = *headbuf = frag;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 2f9c148f17e2..fe4edce459ad 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
struct tipc_msg *hdr;
u16 seqno;
+ spin_lock_bh(&namedq->lock);
skb_queue_walk_safe(namedq, skb, tmp) {
- skb_linearize(skb);
+ if (unlikely(skb_linearize(skb))) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
hdr = buf_msg(skb);
seqno = msg_named_seqno(hdr);
if (msg_is_last_bulk(hdr)) {
@@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
__skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
return skb;
}
if (*open && (*rcv_nxt == seqno)) {
(*rcv_nxt)++;
__skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
return skb;
}
@@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
continue;
}
}
+ spin_unlock_bh(&namedq->lock);
return NULL;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index cf4b239fc569..d269ebe382e1 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n,
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
- __skb_queue_purge(&n->bc_entry.namedq);
+ skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index b74e2741f74f..cec86229a6a0 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -418,14 +418,14 @@ static int tls_push_data(struct sock *sk,
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
- int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record;
int tls_push_record_flags;
struct page_frag *pfrag;
size_t orig_size = size;
u32 max_open_record_len;
- int copy, rc = 0;
+ bool more = false;
bool done = false;
+ int copy, rc = 0;
long timeo;
if (flags &
@@ -492,9 +492,8 @@ handle_error:
if (!size) {
last_record:
tls_push_record_flags = flags;
- if (more) {
- tls_ctx->pending_open_record_frags =
- !!record->num_frags;
+ if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) {
+ more = true;
break;
}
@@ -526,6 +525,8 @@ last_record:
}
} while (!done);
+ tls_ctx->pending_open_record_frags = more;
+
if (orig_size - size > 0)
rc = orig_size - size;
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 4773ce72edbd..ef352477cac6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -20,6 +20,7 @@ TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
+TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
new file mode 100755
index 000000000000..23cf924754a5
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -0,0 +1,626 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
+#
+# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
+#
+#
+# Symmetric routing topology
+#
+# blue red
+# +----+ .253 +----+ .253 +----+
+# | h1 |-------------------| r1 |-------------------| h2 |
+# +----+ .1 +----+ .2 +----+
+# 172.16.1/24 172.16.2/24
+# 2001:db8:16:1/64 2001:db8:16:2/64
+#
+#
+# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
+# to the outgoing vrf red for the n2 network and red has a route back to n1.
+# The red VRF interface has a MTU of 1400.
+#
+# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
+# output of the command to check that a ttl expired error is received.
+#
+# The second test runs traceroute from h1 to h2 and parses the output to check
+# for a hop on r1.
+#
+# The third test sends a ping with a packet size of 1450 from h1 to h2 and
+# parses the output of the command to check that a fragmentation error is
+# received.
+#
+#
+# Asymmetric routing topology
+#
+# This topology represents a customer setup where the issue with icmp errors
+# and VRF route leaking was initialy reported. The MTU test isn't done here
+# because of the lack of a return route in the red VRF.
+#
+# blue red
+# .253 +----+ .253
+# +----| r1 |----+
+# | +----+ |
+# +----+ | | +----+
+# | h1 |--------------+ +--------------| h2 |
+# +----+ .1 | | .2 +----+
+# 172.16.1/24 | +----+ | 172.16.2/24
+# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
+# .254 +----+ .254
+#
+#
+# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
+# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
+# Route from h2 to h1 goes through r2.
+#
+# The objective is to check that the incoming vrf routing table is selected
+# to send an ICMP error back to the source when the ttl of a packet reaches 1
+# while it is forwarded between different vrfs.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+DEFAULT_TTYPE=sym
+
+H1_N1=172.16.1.0/24
+H1_N1_6=2001:db8:16:1::/64
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+
+H2_N2_IP=172.16.2.2
+R1_N2_IP=172.16.2.253
+R2_N2_IP=172.16.2.254
+
+H2_N2_IP6=2001:db8:16:2::2
+R1_N2_IP6=2001:db8:16:2::253
+R2_N2_IP6=2001:db8:16:2::254
+
+################################################################################
+# helpers
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read -r a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+run_cmd_grep()
+{
+ local grep_pattern="$1"
+ shift
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ echo "$out" | grep -q "$grep_pattern"
+ rc=$?
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 r1 r2; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+setup_vrf()
+{
+ local ns=$1
+
+ ip -netns "${ns}" rule del pref 0
+ ip -netns "${ns}" rule add pref 32765 from all lookup local
+ ip -netns "${ns}" -6 rule del pref 0
+ ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
+}
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
+ ip -netns "${ns}" link set "${vrf}" up
+ ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
+ ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
+
+ ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
+ ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
+}
+
+setup_sym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+
+ #
+ # h2
+ #
+ ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 up
+
+ # h2 to h1 via r1
+ ip -netns h2 route add default via ${R1_N2_IP} dev eth0
+ ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # Route leak from red to blue
+ ip -netns r1 route add vrf red ${H1_N1} dev blue
+ ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+setup_asym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1 r2; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h1 link add eth1 type veth peer name r2h1
+ ip -netns h1 link set r2h1 netns r2 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ ip -netns h2 link add eth1 type veth peer name r2h2
+ ip -netns h2 link set r2h2 netns r2 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 link add br0 type bridge
+ ip -netns h1 link set br0 up
+ ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 master br0 up
+ ip -netns h1 link set eth1 master br0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+
+ #
+ # h2
+ #
+ ip -netns h2 link add br0 type bridge
+ ip -netns h2 link set br0 up
+ ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 master br0 up
+ ip -netns h2 link set eth1 master br0 up
+
+ # h2 to h1 via r2
+ ip -netns h2 route add default via ${R2_N2_IP} dev br0
+ ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # No route leak from red to blue
+
+ #
+ # r2
+ #
+ ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
+ ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+check_connectivity()
+{
+ ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv4 connectivity"
+ return $?
+}
+
+check_connectivity6()
+{
+ ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv6 connectivity"
+ return $?
+}
+
+check_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return 1
+ fi
+}
+
+check_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return 1
+ fi
+}
+
+ipv4_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute || return
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+ log_test $? 0 "Traceroute reports a hop on r1"
+}
+
+ipv4_traceroute_asym()
+{
+ ipv4_traceroute asym
+}
+
+ipv6_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute6 || return
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+ log_test $? 0 "Traceroute6 reports a hop on r1"
+}
+
+ipv6_traceroute_asym()
+{
+ ipv6_traceroute asym
+}
+
+ipv4_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP ttl exceeded"
+}
+
+ipv4_ping_ttl_asym()
+{
+ ipv4_ping_ttl asym
+}
+
+ipv4_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP Frag needed"
+}
+
+ipv4_ping_frag_asym()
+{
+ ipv4_ping_frag asym
+}
+
+ipv6_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Hop limit"
+}
+
+ipv6_ping_ttl_asym()
+{
+ ipv6_ping_ttl asym
+}
+
+ipv6_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Packet too big"
+}
+
+ipv6_ping_frag_asym()
+{
+ ipv6_ping_frag asym
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -4 Run IPv4 tests only
+ -6 Run IPv6 tests only
+ -t TEST Run only TEST
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :46t:pvh o
+do
+ case $o in
+ 4) TESTS=ipv4;;
+ 6) TESTS=ipv6;;
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+ TESTS="$TESTS_IPV4 $TESTS_IPV6"
+elif [ "$TESTS" = "ipv4" ]; then
+ TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+fi
+
+for t in $TESTS
+do
+ case $t in
+ ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
+ ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
+ ipv4_traceroute|traceroute) ipv4_traceroute;;&
+ ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
+ ipv4_ping_frag|ping) ipv4_ping_frag;;&
+
+ ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
+ ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
+ ipv6_traceroute|traceroute) ipv6_traceroute;;&
+ ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
+ ipv6_ping_frag|ping) ipv6_ping_frag;;&
+
+ # setup namespaces and config, but do not run any tests
+ setup_sym|setup) setup_sym; exit 0;;
+ setup_asym) setup_asym; exit 0;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
index 29c73bce38fa..9e56b9d47037 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -17,9 +17,12 @@
struct options {
bool count_packets;
+ bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
+ uint32_t verdict;
+ uint32_t delay_ms;
};
static unsigned int queue_stats[5];
@@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p)
{
- printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
}
static struct nlmsghdr *
-nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
@@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total;
int i;
- if (!opts.count_packets)
- return;
-
total = 0;
last = queue_stats[0];
@@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
- flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+ flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl;
}
+static void sleep_ms(uint32_t delay)
+{
+ struct timespec ts = { .tv_sec = delay / 1000 };
+
+ delay %= 1000;
+
+ ts.tv_nsec = delay * 1000llu * 1000llu;
+
+ nanosleep(&ts, NULL);
+}
+
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
- if (errno == ENOBUFS)
+ if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
@@ -298,7 +310,10 @@ static int mainloop(void)
}
id = ret - MNL_CB_OK;
- nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (opts.delay_ms)
+ sleep_ms(opts.delay_ms);
+
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
@@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
+ case 'Q':
+ opts.verdict = atoi(optarg);
+ if (opts.verdict > 0xffff) {
+ fprintf(stderr, "Expected destination queue number\n");
+ exit(1);
+ }
+
+ opts.verdict <<= 16;
+ opts.verdict |= NF_QUEUE;
+ break;
+ case 'd':
+ opts.delay_ms = atoi(optarg);
+ if (opts.delay_ms == 0) {
+ fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+ exit(1);
+ }
+ break;
case 't':
opts.timeout = atoi(optarg);
break;
+ case 'G':
+ opts.gso_enabled = false;
+ break;
case 'v':
opts.verbose++;
break;
}
}
+
+ if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+ fprintf(stderr, "Cannot use same destination and source queue\n");
+ exit(1);
+ }
}
int main(int argc, char *argv[])
{
int ret;
+ opts.verdict = NF_ACCEPT;
+ opts.gso_enabled = true;
+
parse_opts(argc, argv);
ret = mainloop();
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
index d250b84dd5bc..087f0e6e71ce 100755
--- a/tools/testing/selftests/netfilter/nft_meta.sh
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -7,8 +7,7 @@ ksft_skip=4
sfx=$(mktemp -u "XXXXXXXX")
ns0="ns0-$sfx"
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! nft --version > /dev/null 2>&1; then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
@@ -24,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo
trap cleanup EXIT
+currentyear=$(date +%G)
+lastyear=$((currentyear-1))
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table inet filter {
counter iifcount {}
@@ -33,6 +34,9 @@ table inet filter {
counter infproto4count {}
counter il4protocounter {}
counter imarkcounter {}
+ counter icpu0counter {}
+ counter ilastyearcounter {}
+ counter icurrentyearcounter {}
counter oifcount {}
counter oifnamecount {}
@@ -54,6 +58,9 @@ table inet filter {
meta nfproto ipv4 counter name "infproto4count"
meta l4proto icmp counter name "il4protocounter"
meta mark 42 counter name "imarkcounter"
+ meta cpu 0 counter name "icpu0counter"
+ meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+ meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
}
chain output {
@@ -84,11 +91,10 @@ check_one_counter()
local want="packets $2"
local verbose="$3"
- cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want")
- if [ $? -ne 0 ];then
+ if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
echo "FAIL: $cname, want \"$want\", got"
ret=1
- ip netns exec "$ns0" nft list counter inet filter $counter
+ ip netns exec "$ns0" nft list counter inet filter $cname
fi
}
@@ -100,8 +106,7 @@ check_lo_counters()
for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
- il4protocounter \
- ol4protocounter \
+ il4protocounter icurrentyearcounter ol4protocounter \
; do
check_one_counter "$counter" "$want" "$verbose"
done
@@ -116,9 +121,22 @@ check_one_counter oskuidcounter "1" true
check_one_counter oskgidcounter "1" true
check_one_counter imarkcounter "1" true
check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
if [ $ret -eq 0 ];then
echo "OK: nftables meta iif/oif counters at expected values"
+else
+ exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta cpu counter at expected values"
fi
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 6898448b4266..3d202b90b33d 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
+timeout=4
cleanup()
{
@@ -20,6 +21,7 @@ cleanup()
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
@@ -42,6 +44,8 @@ fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
@@ -83,7 +87,7 @@ load_ruleset() {
local name=$1
local prio=$2
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
@@ -118,7 +122,7 @@ EOF
load_counter_ruleset() {
local prio=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
@@ -175,7 +179,7 @@ test_ping_router() {
test_queue_blackhole() {
local proto=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
@@ -184,10 +188,10 @@ table $proto blackh {
}
EOF
if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
@@ -214,8 +218,8 @@ test_queue()
local last=""
# spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
@@ -250,11 +254,11 @@ test_queue()
test_tcp_forward()
{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
@@ -270,15 +274,13 @@ test_tcp_forward()
test_tcp_localhost()
{
- tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
-
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
@@ -287,6 +289,47 @@ test_tcp_localhost()
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -328,5 +371,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
+test_tcp_localhost_requeue
exit $ret