diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-24 08:18:46 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-24 08:18:46 +0100 |
commit | 1d3b78bbc6e983fabb3fbf91b76339bf66e4a12c (patch) | |
tree | 6fe02db448834fd114727b0685a19a61ef0367b4 /net | |
parent | Merge tag 'platform-drivers-x86-v4.15-2' of git://git.infradead.org/linux-pla... (diff) | |
parent | Merge branch 'ipvlan-Fix-insufficient-skb-linear-check' (diff) | |
download | linux-1d3b78bbc6e983fabb3fbf91b76339bf66e4a12c.tar.xz linux-1d3b78bbc6e983fabb3fbf91b76339bf66e4a12c.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Fix PCI IDs of 9000 series iwlwifi devices, from Luca Coelho.
2) bpf offload bug fixes from Jakub Kicinski.
3) Fix bpf verifier to NOP out code which is dead at run time because
due to branch pruning the verifier will not explore such
instructions. From Alexei Starovoitov.
4) Fix crash when deleting secondary chains in packet scheduler
classifier. From Roman Kapl.
5) Fix buffer management bugs in smc, from Ursula Braun.
6) Fix regression in anycast route handling, from David Ahern.
7) Fix link settings regression in r8169, from Tobias Jakobi.
8) Add back enough UFO support so that live migration still works, from
Willem de Bruijn.
9) Linearize enough packet data for the full extent to which the ipvlan
code will inspect the packet headers, from Gao Feng.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (46 commits)
ipvlan: Fix insufficient skb linear check for ipv6 icmp
ipvlan: Fix insufficient skb linear check for arp
geneve: only configure or fill UDP_ZERO_CSUM6_RX/TX info when CONFIG_IPV6
net: dsa: bcm_sf2: Clear IDDQ_GLOBAL_PWR bit for PHY
net: accept UFO datagrams from tuntap and packet
net: realtek: r8169: implement set_link_ksettings()
net: ipv6: Fixup device for anycast routes during copy
net/smc: Fix preinitialization of buf_desc in __smc_buf_create()
net/smc: use sk_rcvbuf as start for rmb creation
ipv6: Do not consider linkdown nexthops during multipath
net: sched: fix crash when deleting secondary chains
net: phy: cortina: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE
bpf: fix branch pruning logic
bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO
bpf: change bpf_probe_read_str arg2 type to ARG_CONST_SIZE_OR_ZERO
bpf: remove explicit handling of 0 for arg2 in bpf_probe_read
bpf: introduce ARG_PTR_TO_MEM_OR_NULL
i40evf: Use smp_rmb rather than read_barrier_depends
fm10k: Use smp_rmb rather than read_barrier_depends
igb: Use smp_rmb rather than read_barrier_depends
...
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 17 | ||||
-rw-r--r-- | net/core/filter.c | 4 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 12 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 49 | ||||
-rw-r--r-- | net/ipv6/output_core.c | 31 | ||||
-rw-r--r-- | net/ipv6/route.c | 7 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 85 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 14 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 6 | ||||
-rw-r--r-- | net/sched/act_csum.c | 6 | ||||
-rw-r--r-- | net/sched/cls_api.c | 7 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 8 | ||||
-rw-r--r-- | net/smc/smc_core.c | 4 |
13 files changed, 221 insertions, 29 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 8ee29f4f5fa9..07ed21d64f92 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2746,7 +2746,8 @@ EXPORT_SYMBOL(skb_mac_gso_segment); static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_UNNECESSARY; return skb->ip_summed == CHECKSUM_NONE; } @@ -7139,13 +7140,17 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, __dev_xdp_attached(dev, bpf_op, NULL)) return -EBUSY; - if (bpf_op == ops->ndo_bpf) - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - dev); - else - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + bpf_op == ops->ndo_bpf); if (IS_ERR(prog)) return PTR_ERR(prog); + + if (!(flags & XDP_FLAGS_HW_MODE) && + bpf_prog_is_dev_bound(prog->aux)) { + NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); + bpf_prog_put(prog); + return -EINVAL; + } } err = dev_xdp_install(dev, bpf_op, extack, flags, prog); diff --git a/net/core/filter.c b/net/core/filter.c index 1afa17935954..6a85e67fafce 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1646,9 +1646,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, + .arg1_type = ARG_PTR_TO_MEM_OR_NULL, .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_PTR_TO_MEM_OR_NULL, .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ce4aa827be05..f00499a46927 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1223,9 +1223,10 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool fixedid = false, gso_partial, encap; + bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; + unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; @@ -1260,6 +1261,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ @@ -1279,7 +1281,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); - if (skb_is_gso(skb)) { + if (udpfrag) { + iph->frag_off = htons(offset >> 3); + if (skb->next) + iph->frag_off |= htons(IP_MF); + offset += skb->len - nhoff - ihl; + tot_len = skb->len - nhoff; + } else if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e360d55be555..01801b77bd0d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -187,16 +187,57 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); -static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + __wsum csum; + struct udphdr *uh; + struct iphdr *iph; if (skb->encapsulation && (skb_shinfo(skb)->gso_type & - (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { segs = skb_udp_tunnel_segment(skb, features, false); + goto out; + } + + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + uh = udp_hdr(skb); + iph = ip_hdr(skb); + + uh->check = 0; + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* If there is no outer header we can fake a checksum offload + * due to the fact that we have already done the checksum in + * software prior to segmenting the frame. + */ + if (!skb->encap_hdr_csum) + features |= NETIF_F_HW_CSUM; + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + segs = skb_segment(skb, features); +out: return segs; } @@ -330,7 +371,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv4_offload = { .callbacks = { - .gso_segment = udp4_tunnel_segment, + .gso_segment = udp4_ufo_fragment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 4a7e5ffa5108..4fe7c90962dd 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -31,6 +31,37 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, return id; } +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return 0; + + net_get_random_once(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + + id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, + &addrs[1], &addrs[0]); + return htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); + __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 05eb7bc36156..7a8d1500d374 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -472,6 +472,11 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, &match->rt6i_siblings, rt6i_siblings) { route_choosen--; if (route_choosen == 0) { + struct inet6_dev *idev = sibling->rt6i_idev; + + if (!netif_carrier_ok(sibling->dst.dev) && + idev->cnf.ignore_routes_with_linkdown) + break; if (rt6_score_route(sibling, oif, strict) < 0) break; match = sibling; @@ -1019,7 +1024,7 @@ static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) { struct net_device *dev = rt->dst.dev; - if (rt->rt6i_flags & RTF_LOCAL) { + if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { /* for copies of local routes, dst->dev needs to be the * device if it is a master device, the master device if * device is enslaved, and the loopback as the default diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 455fd4e39333..a0f89ad76f9d 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,15 +17,94 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + unsigned int unfrag_ip6hlen, unfrag_len; + struct frag_hdr *fptr; + u8 *packet_start, *prevhdr; + u8 nexthdr; + u8 frag_hdr_sz = sizeof(struct frag_hdr); + __wsum csum; + int tnl_hlen; + int err; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); + else { + const struct ipv6hdr *ipv6h; + struct udphdr *uh; + + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot + * do checksum of UDP packets sent as multiple IP fragments. + */ + + uh = udp_hdr(skb); + ipv6h = ipv6_hdr(skb); + + uh->check = 0; + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = udp_v6_check(skb->len, &ipv6h->saddr, + &ipv6h->daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* If there is no outer header we can fake a checksum offload + * due to the fact that we have already done the checksum in + * software prior to segmenting the frame. + */ + if (!skb->encap_hdr_csum) + features |= NETIF_F_HW_CSUM; + + /* Check if there is enough headroom to insert fragment header. */ + tnl_hlen = skb_tnl_header_len(skb); + if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } + + /* Find the unfragmentable header and shift it left by frag_hdr_sz + * bytes to insert fragment header. + */ + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + unfrag_ip6hlen = err; + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + + unfrag_ip6hlen + tnl_hlen; + packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; + memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); + + SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; + skb->mac_header -= frag_hdr_sz; + skb->network_header -= frag_hdr_sz; + + fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); + fptr->nexthdr = nexthdr; + fptr->reserved = 0; + fptr->identification = ipv6_proxy_select_ident(dev_net(skb->dev), skb); + + /* Fragment the skb. ipv6 header and the remaining fields of the + * fragment header are updated in ipv6_gso_segment() + */ + segs = skb_segment(skb, features); + } +out: return segs; } @@ -75,7 +154,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv6_offload = { .callbacks = { - .gso_segment = udp6_tunnel_segment, + .gso_segment = udp6_ufo_fragment, .gro_receive = udp6_gro_receive, .gro_complete = udp6_gro_complete, }, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0dab33fb9844..99cfafc2a139 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -308,6 +308,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { + unsigned short gso_type = skb_shinfo(skb)->gso_type; + struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; @@ -318,9 +320,21 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, if (segs == NULL) return -EINVAL; + if (gso_type & SKB_GSO_UDP) { + /* The initial flow key extracted by ovs_flow_key_extract() + * in this case is for a first fragment, so we need to + * properly mark later fragments. + */ + later_key = *key; + later_key.ip.frag = OVS_FRAG_TYPE_LATER; + } + /* Queue all of the segments. */ skb = segs; do { + if (gso_type & SKB_GSO_UDP && skb != segs) + key = &later_key; + err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); if (err) break; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 864ddb1e3642..dbe2379329c5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -631,7 +631,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } - if (nh->frag_off & htons(IP_MF)) + if (nh->frag_off & htons(IP_MF) || + skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; else key->ip.frag = OVS_FRAG_TYPE_NONE; @@ -747,6 +748,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + key->ip.frag = OVS_FRAG_TYPE_FIRST; + /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 1c40caadcff9..d836f998117b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -229,6 +229,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -282,6 +285,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ab255b421781..7d97f612c9b9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -205,13 +205,14 @@ static void tcf_chain_head_change(struct tcf_chain *chain, static void tcf_chain_flush(struct tcf_chain *chain) { - struct tcf_proto *tp; + struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); tcf_chain_head_change(chain, NULL); - while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { + while (tp) { RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_chain_put(chain); tcf_proto_destroy(tp); + tp = rtnl_dereference(chain->filter_chain); + tcf_chain_put(chain); } } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fb680dafac5a..a9f3e317055c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -382,15 +382,13 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, { struct bpf_prog *fp; char *name = NULL; + bool skip_sw; u32 bpf_fd; bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + skip_sw = gen_flags & TCA_CLS_FLAGS_SKIP_SW; - if (gen_flags & TCA_CLS_FLAGS_SKIP_SW) - fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, - qdisc_dev(tp->q)); - else - fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); + fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, skip_sw); if (IS_ERR(fp)) return PTR_ERR(fp); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2578fbd95664..94f21116dac5 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -562,7 +562,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; - struct smc_buf_desc *buf_desc = NULL; + struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); struct list_head *buf_list; int bufsize, bufsize_short; int sk_buf_size; @@ -575,7 +575,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) /* use socket send buffer size (w/o overhead) as start value */ sk_buf_size = smc->sk.sk_sndbuf / 2; - for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); + for (bufsize_short = smc_compress_bufsize(sk_buf_size); bufsize_short >= 0; bufsize_short--) { if (is_rmb) { |