diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-31 06:11:22 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-31 06:11:22 +0200 |
commit | 036e34310931e64ce4f1edead435708cd517db10 (patch) | |
tree | 7bd50541ef391bf0699b5d016a7a6fd697a5fdfa /net | |
parent | Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm6... (diff) | |
parent | net: correct zerocopy refcnt with udp MSG_MORE (diff) | |
download | linux-036e34310931e64ce4f1edead435708cd517db10.tar.xz linux-036e34310931e64ce4f1edead435708cd517db10.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Fix OOPS during nf_tables rule dump, from Florian Westphal.
2) Use after free in ip_vs_in, from Yue Haibing.
3) Fix various kTLS bugs (NULL deref during device removal resync,
netdev notification ignoring, etc.) From Jakub Kicinski.
4) Fix ipv6 redirects with VRF, from David Ahern.
5) Memory leak fix in igmpv3_del_delrec(), from Eric Dumazet.
6) Missing memory allocation failure check in ip6_ra_control(), from
Gen Zhang. And likewise fix ip_ra_control().
7) TX clean budget logic error in aquantia, from Igor Russkikh.
8) SKB leak in llc_build_and_send_ui_pkt(), from Eric Dumazet.
9) Double frees in mlx5, from Parav Pandit.
10) Fix lost MAC address in r8169 during PCI D3, from Heiner Kallweit.
11) Fix botched register access in mvpp2, from Antoine Tenart.
12) Use after free in napi_gro_frags(), from Eric Dumazet.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (89 commits)
net: correct zerocopy refcnt with udp MSG_MORE
ethtool: Check for vlan etype or vlan tci when parsing flow_rule
net: don't clear sock->sk early to avoid trouble in strparser
net-gro: fix use-after-free read in napi_gro_frags()
net: dsa: tag_8021q: Create a stable binary format
net: dsa: tag_8021q: Change order of rx_vid setup
net: mvpp2: fix bad MVPP2_TXQ_SCHED_TOKEN_CNTR_REG queue value
ipv4: tcp_input: fix stack out of bounds when parsing TCP options.
mlxsw: spectrum: Prevent force of 56G
mlxsw: spectrum_acl: Avoid warning after identical rules insertion
net: dsa: mv88e6xxx: fix handling of upper half of STATS_TYPE_PORT
r8169: fix MAC address being lost in PCI D3
net: core: support XDP generic on stacked devices.
netvsc: unshare skb in VF rx handler
udp: Avoid post-GRO UDP checksum recalculation
net: phy: dp83867: Set up RGMII TX delay
net: phy: dp83867: do not call config_init twice
net: phy: dp83867: increase SGMII autoneg timer duration
net: phy: dp83867: fix speed 10 in sgmii mode
net: phy: marvell10g: report if the PHY fails to boot firmware
...
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 60 | ||||
-rw-r--r-- | net/core/ethtool.c | 8 | ||||
-rw-r--r-- | net/core/skbuff.c | 6 | ||||
-rw-r--r-- | net/dsa/tag_8021q.c | 79 | ||||
-rw-r--r-- | net/hsr/hsr_framereg.c | 8 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 47 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_fib_ipv4.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 57 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 4 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_fib_ipv6.c | 16 | ||||
-rw-r--r-- | net/ipv6/route.c | 6 | ||||
-rw-r--r-- | net/llc/llc_output.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_flow_table_ip.c | 3 | ||||
-rw-r--r-- | net/netfilter/nf_nat_helper.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_queue.c | 1 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 20 | ||||
-rw-r--r-- | net/netfilter/nft_fib.c | 6 | ||||
-rw-r--r-- | net/netfilter/nft_flow_offload.c | 31 | ||||
-rw-r--r-- | net/sched/act_api.c | 3 | ||||
-rw-r--r-- | net/tls/tls_device.c | 24 | ||||
-rw-r--r-- | net/tls/tls_sw.c | 19 |
27 files changed, 245 insertions, 194 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index b6b8505cfb3e..66f7508825bd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4502,23 +4502,6 @@ static int netif_rx_internal(struct sk_buff *skb) trace_netif_rx(skb); - if (static_branch_unlikely(&generic_xdp_needed_key)) { - int ret; - - preempt_disable(); - rcu_read_lock(); - ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); - rcu_read_unlock(); - preempt_enable(); - - /* Consider XDP consuming the packet a success from - * the netdev point of view we do not want to count - * this as an error. - */ - if (ret != XDP_PASS) - return NET_RX_SUCCESS; - } - #ifdef CONFIG_RPS if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4858,6 +4841,18 @@ another_round: __this_cpu_inc(softnet_data.processed); + if (static_branch_unlikely(&generic_xdp_needed_key)) { + int ret2; + + preempt_disable(); + ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); + preempt_enable(); + + if (ret2 != XDP_PASS) + return NET_RX_DROP; + skb_reset_mac_len(skb); + } + if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); @@ -5178,19 +5173,6 @@ static int netif_receive_skb_internal(struct sk_buff *skb) if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; - if (static_branch_unlikely(&generic_xdp_needed_key)) { - int ret; - - preempt_disable(); - rcu_read_lock(); - ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); - rcu_read_unlock(); - preempt_enable(); - - if (ret != XDP_PASS) - return NET_RX_DROP; - } - rcu_read_lock(); #ifdef CONFIG_RPS if (static_branch_unlikely(&rps_needed)) { @@ -5211,7 +5193,6 @@ static int netif_receive_skb_internal(struct sk_buff *skb) static void netif_receive_skb_list_internal(struct list_head *head) { - struct bpf_prog *xdp_prog = NULL; struct sk_buff *skb, *next; struct list_head sublist; @@ -5224,21 +5205,6 @@ static void netif_receive_skb_list_internal(struct list_head *head) } list_splice_init(&sublist, head); - if (static_branch_unlikely(&generic_xdp_needed_key)) { - preempt_disable(); - rcu_read_lock(); - list_for_each_entry_safe(skb, next, head, list) { - xdp_prog = rcu_dereference(skb->dev->xdp_prog); - skb_list_del_init(skb); - if (do_xdp_generic(xdp_prog, skb) == XDP_PASS) - list_add_tail(&skb->list, &sublist); - } - rcu_read_unlock(); - preempt_enable(); - /* Put passed packets back on main list */ - list_splice_init(&sublist, head); - } - rcu_read_lock(); #ifdef CONFIG_RPS if (static_branch_unlikely(&rps_needed)) { @@ -5809,7 +5775,6 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) skb_reset_mac_header(skb); skb_gro_reset_offset(skb); - eth = skb_gro_header_fast(skb, 0); if (unlikely(skb_gro_header_hard(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { @@ -5819,6 +5784,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) return NULL; } } else { + eth = (const struct ethhdr *)skb->data; gro_pull_from_frag0(skb, hlen); NAPI_GRO_CB(skb)->frag0 += hlen; NAPI_GRO_CB(skb)->frag0_len -= hlen; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4a593853cbf2..43e9add58340 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -3010,11 +3010,12 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; - if (ext_m_spec->vlan_etype && - ext_m_spec->vlan_tci) { + if (ext_m_spec->vlan_etype) { match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype; match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype; + } + if (ext_m_spec->vlan_tci) { match->key.vlan.vlan_id = ntohs(ext_h_spec->vlan_tci) & 0x0fff; match->mask.vlan.vlan_id = @@ -3024,7 +3025,10 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13; match->mask.vlan.vlan_priority = (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13; + } + if (ext_m_spec->vlan_etype || + ext_m_spec->vlan_tci) { match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN); match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e89be6282693..eaad23f9c7b5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1036,7 +1036,11 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, uarg->len++; uarg->bytelen = bytelen; atomic_set(&sk->sk_zckey, ++next); - sock_zerocopy_get(uarg); + + /* no extra ref when appending to datagram (MSG_MORE) */ + if (sk->sk_type == SOCK_STREAM) + sock_zerocopy_get(uarg); + return uarg; } } diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 8ae48c7e1e76..65a35e976d7b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -11,20 +11,59 @@ #include "dsa_priv.h" -/* Allocating two VLAN tags per port - one for the RX VID and - * the other for the TX VID - see below +/* Binary structure of the fake 12-bit VID field (when the TPID is + * ETH_P_DSA_8021Q): + * + * | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * +-----------+-----+-----------------+-----------+-----------------------+ + * | DIR | RSV | SWITCH_ID | RSV | PORT | + * +-----------+-----+-----------------+-----------+-----------------------+ + * + * DIR - VID[11:10]: + * Direction flags. + * * 1 (0b01) for RX VLAN, + * * 2 (0b10) for TX VLAN. + * These values make the special VIDs of 0, 1 and 4095 to be left + * unused by this coding scheme. + * + * RSV - VID[9]: + * To be used for further expansion of SWITCH_ID or for other purposes. + * + * SWITCH_ID - VID[8:6]: + * Index of switch within DSA tree. Must be between 0 and + * DSA_MAX_SWITCHES - 1. + * + * RSV - VID[5:4]: + * To be used for further expansion of PORT or for other purposes. + * + * PORT - VID[3:0]: + * Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1. */ -#define DSA_8021Q_VID_RANGE (DSA_MAX_SWITCHES * DSA_MAX_PORTS) -#define DSA_8021Q_VID_BASE (VLAN_N_VID - 2 * DSA_8021Q_VID_RANGE - 1) -#define DSA_8021Q_RX_VID_BASE (DSA_8021Q_VID_BASE) -#define DSA_8021Q_TX_VID_BASE (DSA_8021Q_VID_BASE + DSA_8021Q_VID_RANGE) + +#define DSA_8021Q_DIR_SHIFT 10 +#define DSA_8021Q_DIR_MASK GENMASK(11, 10) +#define DSA_8021Q_DIR(x) (((x) << DSA_8021Q_DIR_SHIFT) & \ + DSA_8021Q_DIR_MASK) +#define DSA_8021Q_DIR_RX DSA_8021Q_DIR(1) +#define DSA_8021Q_DIR_TX DSA_8021Q_DIR(2) + +#define DSA_8021Q_SWITCH_ID_SHIFT 6 +#define DSA_8021Q_SWITCH_ID_MASK GENMASK(8, 6) +#define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \ + DSA_8021Q_SWITCH_ID_MASK) + +#define DSA_8021Q_PORT_SHIFT 0 +#define DSA_8021Q_PORT_MASK GENMASK(3, 0) +#define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ + DSA_8021Q_PORT_MASK) /* Returns the VID to be inserted into the frame from xmit for switch steering * instructions on egress. Encodes switch ID and port ID. */ u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) { - return DSA_8021Q_TX_VID_BASE + (DSA_MAX_PORTS * ds->index) + port; + return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) | + DSA_8021Q_PORT(port); } EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); @@ -33,21 +72,22 @@ EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); */ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) { - return DSA_8021Q_RX_VID_BASE + (DSA_MAX_PORTS * ds->index) + port; + return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) | + DSA_8021Q_PORT(port); } EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); /* Returns the decoded switch ID from the RX VID. */ int dsa_8021q_rx_switch_id(u16 vid) { - return ((vid - DSA_8021Q_RX_VID_BASE) / DSA_MAX_PORTS); + return (vid & DSA_8021Q_SWITCH_ID_MASK) >> DSA_8021Q_SWITCH_ID_SHIFT; } EXPORT_SYMBOL_GPL(dsa_8021q_rx_switch_id); /* Returns the decoded port ID from the RX VID. */ int dsa_8021q_rx_source_port(u16 vid) { - return ((vid - DSA_8021Q_RX_VID_BASE) % DSA_MAX_PORTS); + return (vid & DSA_8021Q_PORT_MASK) >> DSA_8021Q_PORT_SHIFT; } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); @@ -128,10 +168,7 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) u16 flags; if (i == upstream) - /* CPU port needs to see this port's RX VID - * as tagged egress. - */ - flags = 0; + continue; else if (i == port) /* The RX VID is pvid on this port */ flags = BRIDGE_VLAN_INFO_UNTAGGED | @@ -150,6 +187,20 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) return err; } } + + /* CPU port needs to see this port's RX VID + * as tagged egress. + */ + if (enabled) + err = dsa_port_vid_add(upstream_dp, rx_vid, 0); + else + err = dsa_port_vid_del(upstream_dp, rx_vid); + if (err) { + dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", + rx_vid, port, err); + return err; + } + /* Finally apply the TX VID on this port and on the CPU port */ if (enabled) err = dsa_port_vid_add(dp, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED); diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 9fa9abd83018..2d7a19750436 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -365,6 +365,14 @@ void hsr_prune_nodes(struct timer_list *t) rcu_read_lock(); list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { + /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A] + * nor time_in[HSR_PT_SLAVE_B], will ever be updated for + * the master port. Thus the master node will be repeatedly + * pruned leading to packet loss. + */ + if (hsr_addr_is_self(hsr, node->macaddress_A)) + continue; + /* Shorthand */ time_a = node->time_in[HSR_PT_SLAVE_A]; time_b = node->time_in[HSR_PT_SLAVE_B]; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5183a2daba64..aff93e7cdb31 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -428,8 +428,8 @@ int inet_release(struct socket *sock) if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) timeout = sk->sk_lingertime; - sock->sk = NULL; sk->sk_prot->close(sk, timeout); + sock->sk = NULL; } return 0; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6c2febc39dca..eb03153dfe12 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -188,6 +188,17 @@ static void ip_ma_put(struct ip_mc_list *im) pmc != NULL; \ pmc = rtnl_dereference(pmc->next_rcu)) +static void ip_sf_list_clear_all(struct ip_sf_list *psf) +{ + struct ip_sf_list *next; + + while (psf) { + next = psf->sf_next; + kfree(psf); + psf = next; + } +} + #ifdef CONFIG_IP_MULTICAST /* @@ -633,6 +644,13 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf) } } +static void kfree_pmc(struct ip_mc_list *pmc) +{ + ip_sf_list_clear_all(pmc->sources); + ip_sf_list_clear_all(pmc->tomb); + kfree(pmc); +} + static void igmpv3_send_cr(struct in_device *in_dev) { struct ip_mc_list *pmc, *pmc_prev, *pmc_next; @@ -669,7 +687,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) else in_dev->mc_tomb = pmc_next; in_dev_put(pmc->interface); - kfree(pmc); + kfree_pmc(pmc); } else pmc_prev = pmc; } @@ -1215,14 +1233,18 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) im->interface = pmc->interface; if (im->sfmode == MCAST_INCLUDE) { im->tomb = pmc->tomb; + pmc->tomb = NULL; + im->sources = pmc->sources; + pmc->sources = NULL; + for (psf = im->sources; psf; psf = psf->sf_next) psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; } else { im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; } in_dev_put(pmc->interface); - kfree(pmc); + kfree_pmc(pmc); } spin_unlock_bh(&im->lock); } @@ -1243,21 +1265,18 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) nextpmc = pmc->next; ip_mc_clear_src(pmc); in_dev_put(pmc->interface); - kfree(pmc); + kfree_pmc(pmc); } /* clear dead sources, too */ rcu_read_lock(); for_each_pmc_rcu(in_dev, pmc) { - struct ip_sf_list *psf, *psf_next; + struct ip_sf_list *psf; spin_lock_bh(&pmc->lock); psf = pmc->tomb; pmc->tomb = NULL; spin_unlock_bh(&pmc->lock); - for (; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); - } + ip_sf_list_clear_all(psf); } rcu_read_unlock(); } @@ -2123,7 +2142,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, static void ip_mc_clear_src(struct ip_mc_list *pmc) { - struct ip_sf_list *psf, *nextpsf, *tomb, *sources; + struct ip_sf_list *tomb, *sources; spin_lock_bh(&pmc->lock); tomb = pmc->tomb; @@ -2135,14 +2154,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) pmc->sfcount[MCAST_EXCLUDE] = 1; spin_unlock_bh(&pmc->lock); - for (psf = tomb; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); - } - for (psf = sources; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); - } + ip_sf_list_clear_all(tomb); + ip_sf_list_clear_all(sources); } /* Join a multicast group diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index bfd0ca554977..8c9189a41b13 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -878,7 +878,7 @@ static int __ip_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; unsigned int wmem_alloc_delta = 0; - bool paged, extra_uref; + bool paged, extra_uref = false; u32 tskey = 0; skb = skb_peek_tail(queue); @@ -918,7 +918,7 @@ static int __ip_append_data(struct sock *sk, uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; - extra_uref = true; + extra_uref = !skb; /* only extra ref if !MSG_MORE */ if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 82f341e84fae..aa3fd61818c4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -343,6 +343,8 @@ int ip_ra_control(struct sock *sk, unsigned char on, return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + if (on && !new_ra) + return -ENOMEM; mutex_lock(&net->ipv4.ra_mutex); for (rap = &net->ipv4.ra_chain; diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 94eb25bc8d7e..c8888e52591f 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -58,11 +58,6 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, } EXPORT_SYMBOL_GPL(nft_fib4_eval_type); -static int get_ifindex(const struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} - void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { @@ -94,8 +89,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, pkt, - nft_in(pkt)->ifindex); + nft_fib_store_result(dest, priv, nft_in(pkt)); return; } @@ -108,8 +102,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { - nft_fib_store_result(dest, priv, pkt, - get_ifindex(pkt->skb->dev)); + nft_fib_store_result(dest, priv, pkt->skb->dev); return; } } @@ -150,17 +143,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, found = oif; } - switch (priv->result) { - case NFT_FIB_RESULT_OIF: - *dest = found->ifindex; - break; - case NFT_FIB_RESULT_OIFNAME: - strncpy((char *)dest, found->name, IFNAMSIZ); - break; - default: - WARN_ON_ONCE(1); - break; - } + nft_fib_store_result(dest, priv, found); } EXPORT_SYMBOL_GPL(nft_fib4_eval); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c61edd023b35..08a477e74cf3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3791,6 +3791,8 @@ void tcp_parse_options(const struct net *net, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f96d1de79509..b51630ddb728 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5661,18 +5661,6 @@ static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = { [IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) }, }; -static int inet6_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) -{ - struct nlattr *tb[IFLA_INET6_MAX + 1]; - - if (dev && !__in6_dev_get(dev)) - return -EAFNOSUPPORT; - - return nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, - inet6_af_policy, NULL); -} - static int check_addr_gen_mode(int mode) { if (mode != IN6_ADDR_GEN_MODE_EUI64 && @@ -5693,14 +5681,44 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net, return 1; } +static int inet6_validate_link_af(const struct net_device *dev, + const struct nlattr *nla) +{ + struct nlattr *tb[IFLA_INET6_MAX + 1]; + struct inet6_dev *idev = NULL; + int err; + + if (dev) { + idev = __in6_dev_get(dev); + if (!idev) + return -EAFNOSUPPORT; + } + + err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, + inet6_af_policy, NULL); + if (err) + return err; + + if (!tb[IFLA_INET6_TOKEN] && !tb[IFLA_INET6_ADDR_GEN_MODE]) + return -EINVAL; + + if (tb[IFLA_INET6_ADDR_GEN_MODE]) { + u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); + + if (check_addr_gen_mode(mode) < 0) + return -EINVAL; + if (dev && check_stable_privacy(idev, dev_net(dev), mode) < 0) + return -EINVAL; + } + + return 0; +} + static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) { - int err = -EINVAL; struct inet6_dev *idev = __in6_dev_get(dev); struct nlattr *tb[IFLA_INET6_MAX + 1]; - - if (!idev) - return -EAFNOSUPPORT; + int err; if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) BUG(); @@ -5714,15 +5732,10 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (tb[IFLA_INET6_ADDR_GEN_MODE]) { u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); - if (check_addr_gen_mode(mode) < 0 || - check_stable_privacy(idev, dev_net(dev), mode) < 0) - return -EINVAL; - idev->cnf.addr_gen_mode = mode; - err = 0; } - return err; + return 0; } static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index adef2236abe2..f9e43323e667 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1275,7 +1275,7 @@ static int __ip6_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; - bool paged, extra_uref; + bool paged, extra_uref = false; skb = skb_peek_tail(queue); if (!skb) { @@ -1344,7 +1344,7 @@ emsgsize: uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; - extra_uref = true; + extra_uref = !skb; /* only extra ref if !MSG_MORE */ if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 40f21fef25ff..0a3d035feb61 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -68,6 +68,8 @@ int ip6_ra_control(struct sock *sk, int sel) return -ENOPROTOOPT; new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + if (sel >= 0 && !new_ra) + return -ENOMEM; write_lock_bh(&ip6_ra_lock); for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 73cdc0bc63f7..ec068b0cffca 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -169,8 +169,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, pkt, - nft_in(pkt)->ifindex); + nft_fib_store_result(dest, priv, nft_in(pkt)); return; } @@ -187,18 +186,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (oif && oif != rt->rt6i_idev->dev) goto put_rt_err; - switch (priv->result) { - case NFT_FIB_RESULT_OIF: - *dest = rt->rt6i_idev->dev->ifindex; - break; - case NFT_FIB_RESULT_OIFNAME: - strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); - break; - default: - WARN_ON_ONCE(1); - break; - } - + nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); put_rt_err: ip6_rt_put(rt); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7a014ca877ed..848e944f07df 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2512,6 +2512,12 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_info *rt; struct fib6_node *fn; + /* l3mdev_update_flow overrides oif if the device is enslaved; in + * this case we must match on the real ingress device, so reset it + */ + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + fl6->flowi6_oif = skb->dev->ifindex; + /* Get the "current" route for this destination and * check if the redirect has come from appropriate router. * diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index 94425e421213..9e4b6bcf6920 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -72,6 +72,8 @@ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac); if (likely(!rc)) rc = dev_queue_xmit(skb); + else + kfree_skb(skb); return rc; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 14457551bcb4..8ebf21149ec3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2312,7 +2312,6 @@ static void __net_exit __ip_vs_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); @@ -2327,6 +2326,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); EnterFunction(2); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(ipvs); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 96825e20368f..241317473114 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -244,8 +244,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; outdev = rt->dst.dev; - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && - (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) + if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) return NF_ACCEPT; if (skb_try_make_writable(skb, sizeof(*iph))) diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index ccc06f7539d7..53aeb12b70fb 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -170,7 +170,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) return true; - nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP, + nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_UDP, udph, &udph->check, datalen, oldlen); return true; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 9dc1d6e04946..b5b2be55ca82 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -255,6 +255,7 @@ static unsigned int nf_iterate(struct sk_buff *skb, repeat: verdict = nf_hook_entry_hookfn(hook, skb, state); if (verdict != NF_ACCEPT) { + *index = i; if (verdict != NF_REPEAT) return verdict; goto repeat; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 28241e82fd15..4b5159936034 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2270,13 +2270,13 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, - const struct nft_rule *rule) + const struct nft_rule *rule, + const struct nft_rule *prule) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; - const struct nft_rule *prule; u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); @@ -2296,8 +2296,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, NFTA_RULE_PAD)) goto nla_put_failure; - if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { - prule = list_prev_entry(rule, list); + if (event != NFT_MSG_DELRULE && prule) { if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(prule->handle), NFTA_RULE_PAD)) @@ -2344,7 +2343,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, event, 0, ctx->family, ctx->table, - ctx->chain, rule); + ctx->chain, rule, NULL); if (err < 0) { kfree_skb(skb); goto err; @@ -2369,12 +2368,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, const struct nft_chain *chain) { struct net *net = sock_net(skb->sk); + const struct nft_rule *rule, *prule; unsigned int s_idx = cb->args[0]; - const struct nft_rule *rule; + prule = NULL; list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) - goto cont; + goto cont_skip; if (*idx < s_idx) goto cont; if (*idx > s_idx) { @@ -2386,11 +2386,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule) < 0) + table, chain, rule, prule) < 0) return 1; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: + prule = rule; +cont_skip: (*idx)++; } return 0; @@ -2546,7 +2548,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, - family, table, chain, rule); + family, table, chain, rule, NULL); if (err < 0) goto err; diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 21df8cccea65..77f00a99dfab 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -135,17 +135,17 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) EXPORT_SYMBOL_GPL(nft_fib_dump); void nft_fib_store_result(void *reg, const struct nft_fib *priv, - const struct nft_pktinfo *pkt, int index) + const struct net_device *dev) { - struct net_device *dev; u32 *dreg = reg; + int index; switch (priv->result) { case NFT_FIB_RESULT_OIF: + index = dev ? dev->ifindex : 0; *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; break; case NFT_FIB_RESULT_OIFNAME: - dev = dev_get_by_index_rcu(nft_net(pkt), index); if (priv->flags & NFTA_FIB_F_PRESENT) *dreg = !!dev; else diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ffb25d5e8dbe..aa5f571d4361 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -13,7 +13,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <linux/netfilter/nf_conntrack_common.h> #include <net/netfilter/nf_flow_table.h> -#include <net/netfilter/nf_conntrack_helper.h> struct nft_flow_offload { struct nft_flowtable *flowtable; @@ -50,15 +49,20 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, return 0; } -static bool nft_flow_offload_skip(struct sk_buff *skb) +static bool nft_flow_offload_skip(struct sk_buff *skb, int family) { - struct ip_options *opt = &(IPCB(skb)->opt); - - if (unlikely(opt->optlen)) - return true; if (skb_sec_path(skb)) return true; + if (family == NFPROTO_IPV4) { + const struct ip_options *opt; + + opt = &(IPCB(skb)->opt); + + if (unlikely(opt->optlen)) + return true; + } + return false; } @@ -68,15 +72,15 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; - const struct nf_conn_help *help; enum ip_conntrack_info ctinfo; struct nf_flow_route route; struct flow_offload *flow; enum ip_conntrack_dir dir; + bool is_tcp = false; struct nf_conn *ct; int ret; - if (nft_flow_offload_skip(pkt->skb)) + if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt))) goto out; ct = nf_ct_get(pkt->skb, &ctinfo); @@ -85,14 +89,16 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { case IPPROTO_TCP: + is_tcp = true; + break; case IPPROTO_UDP: break; default: goto out; } - help = nfct_help(ct); - if (help) + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || + ct->status & IPS_SEQ_ADJUST) goto out; if (!nf_ct_is_confirmed(ct)) @@ -109,6 +115,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; + if (is_tcp) { + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + } + ret = flow_offload_add(flowtable, flow); if (ret < 0) goto err_flow_add; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 683fcc00da49..c42ecf4b3c10 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -800,7 +800,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { a = actions[i]; - nest = nla_nest_start_noflag(skb, a->order); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_1(skb, a, bind, ref); @@ -1303,7 +1303,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, ret = PTR_ERR(act); goto err; } - act->order = i; attr_size += tcf_action_fill_size(act); actions[i - 1] = act; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index ca54a7c7ec81..b95c408fd771 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -553,8 +553,8 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct net_device *netdev = tls_ctx->netdev; struct tls_offload_context_rx *rx_ctx; + struct net_device *netdev; u32 is_req_pending; s64 resync_req; u32 req_seq; @@ -568,10 +568,15 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) is_req_pending = resync_req; if (unlikely(is_req_pending) && req_seq == seq && - atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) - netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk, - seq + TLS_HEADER_SIZE - 1, - rcd_sn); + atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) { + seq += TLS_HEADER_SIZE - 1; + down_read(&device_offload_lock); + netdev = tls_ctx->netdev; + if (netdev) + netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk, seq, + rcd_sn); + up_read(&device_offload_lock); + } } static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) @@ -934,12 +939,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk) if (!netdev) goto out; - if (!(netdev->features & NETIF_F_HW_TLS_RX)) { - pr_err_ratelimited("%s: device is missing NETIF_F_HW_TLS_RX cap\n", - __func__); - goto out; - } - netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); @@ -998,7 +997,8 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - if (!(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX))) + if (!dev->tlsdev_ops && + !(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX))) return NOTIFY_DONE; switch (event) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d93f83f77864..960494f437ac 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1712,15 +1712,14 @@ int tls_sw_recvmsg(struct sock *sk, copied = err; } - len = len - copied; - if (len) { - target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - } else { + if (len <= copied) goto recv_end; - } - do { + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + len = len - copied; + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + while (len && (decrypted + copied < target || ctx->recv_pkt)) { bool retain_skb = false; bool zc = false; int to_decrypt; @@ -1851,11 +1850,7 @@ pick_next_record: } else { break; } - - /* If we have a new message from strparser, continue now. */ - if (decrypted >= target && !ctx->recv_pkt) - break; - } while (len); + } recv_end: if (num_async) { |