diff options
Diffstat (limited to 'net/ipv6')
36 files changed, 406 insertions, 371 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c8380f1876f1..d72fa90d6feb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include <net/ip.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/l3mdev.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> #include <linux/netconf.h> @@ -2146,7 +2147,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX, .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -2179,8 +2180,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; - table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2211,7 +2213,7 @@ out: static void addrconf_add_mroute(struct net_device *dev) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_LOCAL, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL, .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_dst_len = 8, @@ -3029,6 +3031,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { struct in6_addr addr; + /* no link local addresses on L3 master devices */ + if (netif_is_l3_master(idev->dev)) + return; + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { @@ -3119,6 +3125,8 @@ static void addrconf_gre_config(struct net_device *dev) } addrconf_addr_gen(idev, true); + if (dev->flags & IFF_POINTOPOINT) + addrconf_add_mroute(dev); } #endif @@ -3139,6 +3147,32 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } break; + case NETDEV_CHANGEMTU: + /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */ + if (dev->mtu < IPV6_MIN_MTU) { + addrconf_ifdown(dev, 1); + break; + } + + if (idev) { + rt6_mtu_change(dev, dev->mtu); + idev->cnf.mtu6 = dev->mtu; + break; + } + + /* allocate new idev */ + idev = ipv6_add_dev(dev); + if (IS_ERR(idev)) + break; + + /* device is still not ready */ + if (!(idev->if_flags & IF_READY)) + break; + + run_pending = 1; + + /* fall through */ + case NETDEV_UP: case NETDEV_CHANGE: if (dev->flags & IFF_SLAVE) @@ -3162,7 +3196,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, idev->if_flags |= IF_READY; run_pending = 1; } - } else { + } else if (event == NETDEV_CHANGE) { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ break; @@ -3227,24 +3261,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } break; - case NETDEV_CHANGEMTU: - if (idev && dev->mtu >= IPV6_MIN_MTU) { - rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; - break; - } - - if (!idev && dev->mtu >= IPV6_MIN_MTU) { - idev = ipv6_add_dev(dev); - if (!IS_ERR(idev)) - break; - } - - /* - * if MTU under IPV6_MIN_MTU. - * Stop IPv6 on this interface. - */ - case NETDEV_DOWN: case NETDEV_UNREGISTER: /* @@ -4780,7 +4796,8 @@ nla_put_failure: return -EMSGSIZE; } -static size_t inet6_get_link_af_size(const struct net_device *dev) +static size_t inet6_get_link_af_size(const struct net_device *dev, + u32 ext_filter_mask) { if (!__in6_dev_get(dev)) return 0; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 9f777ec59a59..ed33abf57abd 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -32,6 +32,7 @@ struct fib6_rule { struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { + struct rt6_info *rt; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .flags = FIB_LOOKUP_NOREF, @@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); - if (arg.result) - return arg.result; + rt = arg.result; - dst_hold(&net->ipv6.ip6_null_entry->dst); - return &net->ipv6.ip6_null_entry->dst; + if (!rt) { + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; + } + + if (rt->rt6i_flags & RTF_REJECT && + rt->dst.error == -EAGAIN) { + ip6_rt_put(rt); + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + + return &rt->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6c2b2132c8d3..36c5a98b0472 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -68,6 +68,7 @@ #include <net/xfrm.h> #include <net/inet_common.h> #include <net/dsfield.h> +#include <net/l3mdev.h> #include <asm/uaccess.h> @@ -452,7 +453,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); + net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); return; } @@ -460,7 +462,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); + net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); return; } @@ -496,6 +499,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); + dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; @@ -509,7 +515,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { - net_dbg_ratelimited("icmp: len problem\n"); + net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", + &hdr->saddr, &hdr->daddr); goto out_dst_release; } @@ -575,7 +582,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = skb->dev->ifindex; + fl6.flowi6_oif = l3mdev_fib_oif(skb->dev); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -781,7 +788,8 @@ static int icmpv6_rcv(struct sk_buff *skb) if (type & ICMPV6_INFOMSG_MASK) break; - net_dbg_ratelimited("icmpv6: msg of unknown type\n"); + net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", + saddr, daddr); /* * error of unknown type. diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c index 678d2df4b8d9..1a6852e1ac69 100644 --- a/net/ipv6/ila.c +++ b/net/ipv6/ila.c @@ -91,7 +91,7 @@ static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) *(__be64 *)&ip6h->daddr = p->locator; } -static int ila_output(struct sock *sk, struct sk_buff *skb) +static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -100,7 +100,7 @@ static int ila_output(struct sock *sk, struct sk_buff *skb) update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); - return dst->lwtstate->orig_output(sk, skb); + return dst->lwtstate->orig_output(net, sk, skb); drop: kfree_skb(skb); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 91b7d33f508b..5d1c7cee2cb2 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -67,15 +67,16 @@ EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, - const struct request_sock *req) + const struct request_sock *req, + u8 proto) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); - fl6->flowi6_proto = IPPROTO_TCP; + fl6->flowi6_proto = proto; fl6->daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(fl6, np->opt, &final); fl6->saddr = ireq->ir_v6_loc_addr; @@ -91,73 +92,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, return dst; } - -/* - * request_sock (formerly open request) hash tables. - */ -static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, - const u32 rnd, const u32 synq_hsize) -{ - u32 c; - - c = jhash_3words((__force u32)raddr->s6_addr32[0], - (__force u32)raddr->s6_addr32[1], - (__force u32)raddr->s6_addr32[2], - rnd); - - c = jhash_2words((__force u32)raddr->s6_addr32[3], - (__force u32)rport, - c); - - return c & (synq_hsize - 1); -} - -struct request_sock *inet6_csk_search_req(struct sock *sk, - const __be16 rport, - const struct in6_addr *raddr, - const struct in6_addr *laddr, - const int iif) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req; - u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, - lopt->nr_table_entries); - - spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); - for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { - const struct inet_request_sock *ireq = inet_rsk(req); - - if (ireq->ir_rmt_port == rport && - req->rsk_ops->family == AF_INET6 && - ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && - ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && - (!ireq->ir_iif || ireq->ir_iif == iif)) { - atomic_inc(&req->rsk_refcnt); - WARN_ON(req->sk != NULL); - break; - } - } - spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); - - return req; -} -EXPORT_SYMBOL_GPL(inet6_csk_search_req); - -void inet6_csk_reqsk_queue_hash_add(struct sock *sk, - struct request_sock *req, - const unsigned long timeout) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr, - inet_rsk(req)->ir_rmt_port, - lopt->hash_rnd, lopt->nr_table_entries); - - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); - inet_csk_reqsk_queue_added(sk, timeout); -} -EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); +EXPORT_SYMBOL(inet6_csk_route_req); void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 6ac8dad0138a..21ace5a2bf7c 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -114,6 +114,8 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } + if (sk->sk_incoming_cpu == raw_smp_processor_id()) + score++; } return score; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7d2e0023c72d..0c7e276c230e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -264,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return NULL; } +EXPORT_SYMBOL_GPL(fib6_get_table); static void __net_init fib6_tables_init(struct net *net) { @@ -285,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + struct rt6_info *rt; + + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + if (rt->rt6i_flags & RTF_REJECT && + rt->dst.error == -EAGAIN) { + ip6_rt_put(rt); + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + + return &rt->dst; } static void __net_init fib6_tables_init(struct net *net) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 08b62047c67f..eeca943f12dc 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -264,6 +264,9 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; + if (skb->encapsulation) + skb_set_inner_network_header(skb, nhoff); + iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); rcu_read_lock(); @@ -280,6 +283,13 @@ out_unlock: return err; } +static int sit_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_SIT; + return ipv6_gro_complete(skb, nhoff); +} + static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { @@ -292,6 +302,8 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = sit_gro_complete, }, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a598fe2c0849..c2650688aca7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -28,6 +28,7 @@ #include <linux/errno.h> #include <linux/kernel.h> +#include <linux/overflow-arith.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/net.h> @@ -55,12 +56,12 @@ #include <net/xfrm.h> #include <net/checksum.h> #include <linux/mroute6.h> +#include <net/l3mdev.h> -static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) +static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; - struct net *net = dev_net(dev); struct neighbour *neigh; struct in6_addr *nexthop; int ret; @@ -126,16 +127,15 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) - return ip6_fragment(sk, skb, ip6_finish_output2); + return ip6_fragment(net, sk, skb, ip6_finish_output2); else - return ip6_finish_output2(sk, skb); + return ip6_finish_output2(net, sk, skb); } -int ip6_output(struct sock *sk, struct sk_buff *skb) +int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - struct net *net = dev_net(dev); if (unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); @@ -234,7 +234,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, (struct sock *)sk, skb, NULL, dst->dev, - dst_output_okfn); + dst_output); } skb->dev = dst->dev; @@ -334,7 +334,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_sender_cpu_clear(skb); - return dst_output(sk, skb); + return dst_output(net, sk, skb); } static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -390,6 +390,9 @@ int ip6_forward(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto drop; + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; @@ -554,8 +557,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)) +int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -568,7 +571,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, __be32 frag_id; int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; - struct net *net = dev_net(skb_dst(skb)->dev); hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; @@ -595,7 +597,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, if (np->frag_size) mtu = np->frag_size; } - mtu -= hlen + sizeof(struct frag_hdr); + + if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) || + mtu <= 7) + goto fail_toobig; frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); @@ -688,7 +693,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, ip6_copy_metadata(frag, skb); } - err = output(sk, skb); + err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -816,7 +821,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - err = output(sk, frag); + err = output(net, sk, frag); if (err) goto fail; @@ -888,7 +893,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { dst_release(dst); dst = NULL; } @@ -1040,7 +1046,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; if (!fl6->flowi6_oif) - fl6->flowi6_oif = dst->dev->ifindex; + fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -1694,7 +1700,7 @@ int ip6_send_skb(struct sk_buff *skb) struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); int err; - err = ip6_local_out(skb); + err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) err = net_xmit_errno(err); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index f96f1c19b4a8..0a8610b33d79 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -482,7 +482,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) return -EMSGSIZE; } - err = dst_output(skb->sk, skb); + err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 5e5d16e7ce85..ad19136086dd 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1991,7 +1991,7 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTOCTETS, skb->len); - return dst_output(sk, skb); + return dst_output(net, sk, skb); } /* diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a8bf57ca74d3..124338a39e29 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1646,7 +1646,7 @@ static void mld_sendpack(struct sk_buff *skb) err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, net->ipv6.igmp_sk, skb, NULL, skb->dev, - dst_output_okfn); + dst_output); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); @@ -2010,7 +2010,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) skb_dst_set(skb, dst); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb->dev, - dst_output_okfn); + dst_output); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index b9779d441b12..60c79a08e14a 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -118,7 +118,7 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) struct mip6_report_rate_limiter { spinlock_t lock; - struct timeval stamp; + ktime_t stamp; int iif; struct in6_addr src; struct in6_addr dst; @@ -184,20 +184,18 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -static inline int mip6_report_rl_allow(struct timeval *stamp, +static inline int mip6_report_rl_allow(ktime_t stamp, const struct in6_addr *dst, const struct in6_addr *src, int iif) { int allow = 0; spin_lock_bh(&mip6_report_rl.lock); - if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec || - mip6_report_rl.stamp.tv_usec != stamp->tv_usec || + if (!ktime_equal(mip6_report_rl.stamp, stamp) || mip6_report_rl.iif != iif || !ipv6_addr_equal(&mip6_report_rl.src, src) || !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { - mip6_report_rl.stamp.tv_sec = stamp->tv_sec; - mip6_report_rl.stamp.tv_usec = stamp->tv_usec; + mip6_report_rl.stamp = stamp; mip6_report_rl.iif = iif; mip6_report_rl.src = *src; mip6_report_rl.dst = *dst; @@ -216,7 +214,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; int offset; - struct timeval stamp; + ktime_t stamp; int err = 0; if (unlikely(fl6->flowi6_proto == IPPROTO_MH && @@ -230,9 +228,9 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, (skb_network_header(skb) + offset); } - skb_get_timestamp(skb, &stamp); + stamp = skb_get_ktime(skb); - if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr, + if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr, hao ? &hao->addr : &ipv6_hdr(skb)->saddr, opt->iif)) goto out; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7089c305245c..3e0f855e1bea 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,6 +67,7 @@ #include <net/flow.h> #include <net/ip6_checksum.h> #include <net/inet_common.h> +#include <net/l3mdev.h> #include <linux/proc_fs.h> #include <linux/netfilter.h> @@ -147,6 +148,7 @@ struct neigh_table nd_tbl = { .gc_thresh2 = 512, .gc_thresh3 = 1024, }; +EXPORT_SYMBOL_GPL(nd_tbl); static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) { @@ -441,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; + int oif = l3mdev_fib_oif(skb->dev); - icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex); + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); + if (oif != skb->dev->ifindex) + fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -465,7 +470,7 @@ static void ndisc_send_skb(struct sk_buff *skb, err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, dst->dev, - dst_output_okfn); + dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); @@ -766,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { - +have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { /* @@ -792,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb) } else { struct net *net = dev_net(dev); + /* perhaps an address on the master device */ + if (netif_is_l3_slave(dev)) { + struct net_device *mdev; + + mdev = netdev_master_upper_dev_get_rcu(dev); + if (mdev) { + ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1); + if (ifp) + goto have_ifp; + } + } + idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ @@ -1483,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct flowi6 fl6; int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; + int oif = l3mdev_fib_oif(dev); bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { @@ -1499,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); + &saddr_buf, &ipv6_hdr(skb)->saddr, oif); + + if (oif != skb->dev->ifindex) + fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index b4de08a83e0b..d11c46833d61 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -18,9 +18,8 @@ #include <net/ip6_checksum.h> #include <net/netfilter/nf_queue.h> -int ip6_route_me_harder(struct sk_buff *skb) +int ip6_route_me_harder(struct net *net, struct sk_buff *skb) { - struct net *net = dev_net(skb_dst(skb)->dev); const struct ipv6hdr *iph = ipv6_hdr(skb); unsigned int hh_len; struct dst_entry *dst; @@ -93,7 +92,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, } } -static int nf_ip6_reroute(struct sk_buff *skb, +static int nf_ip6_reroute(struct net *net, struct sk_buff *skb, const struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); @@ -103,7 +102,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(skb); + return ip6_route_me_harder(net, skb); } return 0; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 96833e4b3193..f6a024e141e5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -58,6 +58,7 @@ endif # NF_TABLES config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK help This option enables the nf_dup_ipv6 core, which duplicates an IPv6 packet to be rerouted to another destination. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 80e3bd72b715..99425cf2819b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -117,7 +117,7 @@ ip6_packet_match(const struct sk_buff *skb, if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", indev, ip6info->iniface, - ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":""); + ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : ""); return false; } @@ -126,14 +126,14 @@ ip6_packet_match(const struct sk_buff *skb, if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", outdev, ip6info->outiface, - ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":""); + ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : ""); return false; } /* ... might want to do something with class and flowlabel here ... */ /* look for the desired protocol header */ - if((ip6info->flags & IP6T_F_PROTO)) { + if (ip6info->flags & IP6T_F_PROTO) { int protohdr; unsigned short _frag_off; @@ -151,9 +151,9 @@ ip6_packet_match(const struct sk_buff *skb, ip6info->proto); if (ip6info->proto == protohdr) { - if(ip6info->invflags & IP6T_INV_PROTO) { + if (ip6info->invflags & IP6T_INV_PROTO) return false; - } + return true; } @@ -443,8 +443,8 @@ ip6t_do_table(struct sk_buff *skb, break; } while (!acpar.hotdrop); - xt_write_recseq_end(addend); - local_bh_enable(); + xt_write_recseq_end(addend); + local_bh_enable(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -561,7 +561,7 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = newpos; } } - next: +next: duprintf("Finished chain %u\n", hook); } return 1; @@ -816,7 +816,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) newinfo) */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, - const struct ip6t_replace *repl) + const struct ip6t_replace *repl) { struct ip6t_entry *iter; unsigned int i; @@ -1090,7 +1090,7 @@ static int compat_table_info(const struct xt_table_info *info, #endif static int get_info(struct net *net, void __user *user, - const int *len, int compat) + const int *len, int compat) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1152,7 +1152,7 @@ static int get_info(struct net *net, void __user *user, static int get_entries(struct net *net, struct ip6t_get_entries __user *uptr, - const int *len) + const int *len) { int ret; struct ip6t_get_entries get; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index c2356602158a..3deed5860a42 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -76,7 +76,7 @@ synproxy_send_tcp(const struct synproxy_net *snet, nf_conntrack_get(nfct); } - ip6_local_out(nskb); + ip6_local_out(net, nskb->sk, nskb); return; free_nskb: @@ -244,7 +244,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, - niph, nth, tcp_hdr_size); + niph, nth, tcp_hdr_size); } static bool @@ -458,14 +458,12 @@ static struct xt_target synproxy_tg6_reg __read_mostly = { static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { { .hook = ipv6_synproxy_hook, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, }, { .hook = ipv6_synproxy_hook, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 8745b592b2f6..abe278b07932 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -65,7 +65,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(skb); + err = ip6_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index abea175d5853..de2a10a565f5 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -70,7 +70,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { /* Before packet filtering, change destination */ { .hook = ip6table_nat_in, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, @@ -78,7 +77,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { /* After packet filtering, change source */ { .hook = ip6table_nat_out, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, @@ -86,7 +84,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { /* Before packet filtering, change destination */ { .hook = ip6table_nat_local_fn, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, @@ -94,7 +91,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { /* After packet filtering, change source */ { .hook = ip6table_nat_fn, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index dd83ad42f8f6..1aa5848764a7 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -187,42 +187,36 @@ static unsigned int ipv6_conntrack_local(void *priv, static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { { .hook = ipv6_conntrack_in, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_conntrack_local, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_helper, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_CONNTRACK_HELPER, }, { .hook = ipv6_confirm, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, { .hook = ipv6_helper, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_CONNTRACK_HELPER, }, { .hook = ipv6_confirm, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_LAST-1, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d3b797446cea..660bc10c7a9c 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -57,12 +57,12 @@ static const u_int8_t invmap[] = { [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, - [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY +1 + [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1 }; static const u_int8_t noct_valid_new[] = { [ICMPV6_MGM_QUERY - 130] = 1, - [ICMPV6_MGM_REPORT -130] = 1, + [ICMPV6_MGM_REPORT - 130] = 1, [ICMPV6_MGM_REDUCTION - 130] = 1, [NDISC_ROUTER_SOLICITATION - 130] = 1, [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 701cd2bae0a9..056f5d4a852a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -59,7 +59,7 @@ struct nf_ct_frag6_skb_cb struct sk_buff *orig; }; -#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) +#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) static struct inet_frags nf_frags; @@ -445,7 +445,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - for (fp=head->next; fp; fp = fp->next) { + for (fp = head->next; fp; fp = fp->next) { head->data_len += fp->len; head->len += fp->len; if (head->ip_summed != fp->ip_summed) @@ -563,12 +563,10 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) +struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; - struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) - : dev_net(skb->dev); struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index a99baf63eccf..4fdbed5ebfb6 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -63,7 +63,8 @@ static unsigned int ipv6_defrag(void *priv, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(state->hook, skb)); + reasm = nf_ct_frag6_gather(state->net, skb, + nf_ct6_defrag_user(state->hook, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -84,14 +85,12 @@ static unsigned int ipv6_defrag(void *priv, static struct nf_hook_ops ipv6_defrag_ops[] = { { .hook = ipv6_defrag, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_defrag, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index ee0d9a5b16c3..6989c70ae29f 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -68,7 +68,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, } if (nf_dup_ipv6_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); - ip6_local_out(skb); + ip6_local_out(net, skb->sk, skb); __this_cpu_write(nf_skb_duplicated, false); } else { kfree_skb(skb); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 18e835ffbef3..238e70c3f7b7 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -437,7 +437,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(skb); + err = ip6_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 94b4c6dfb400..e0f922b777e3 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -26,7 +26,7 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, int tcphoff; proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), + tcphoff = ipv6_skip_exthdr(oldskb, ((u8 *)(oip6h + 1) - oldskb->data), &proto, &frag_off); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { @@ -206,7 +206,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) dev_queue_xmit(nskb); } else #endif - ip6_local_out(nskb); + ip6_local_out(net, nskb->sk, nskb); } EXPORT_SYMBOL_GPL(nf_send_reset6); @@ -224,7 +224,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) return true; proto = ip6h->nexthdr; - thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + thoff = ipv6_skip_exthdr(skb, ((u8 *)(ip6h + 1) - skb->data), &proto, &fo); if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index d42bbc1d7555..71d995ff3108 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -52,7 +52,7 @@ static unsigned int nf_route_table_hook(void *priv, skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP; return ret; } @@ -61,11 +61,11 @@ static const struct nf_chain_type nft_chain_route_ipv6 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, .family = NFPROTO_IPV6, - .owner = THIS_MODULE, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_OUT), .hooks = { - [NF_INET_LOCAL_OUT] = nf_route_table_hook, - }, + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, }; static int __init nft_chain_route_init(void) diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index e77102c4f804..462f2a76b5c2 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -138,9 +138,8 @@ int ip6_dst_hoplimit(struct dst_entry *dst) EXPORT_SYMBOL(ip6_dst_hoplimit); #endif -static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) +int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net *net = dev_net(skb_dst(skb)->dev); int len; len = skb->len - sizeof(struct ipv6hdr); @@ -151,29 +150,18 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, - dst_output_okfn); -} - -int __ip6_local_out(struct sk_buff *skb) -{ - return __ip6_local_out_sk(skb->sk, skb); + dst_output); } EXPORT_SYMBOL_GPL(__ip6_local_out); -int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) +int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; - err = __ip6_local_out_sk(sk, skb); + err = __ip6_local_out(net, sk, skb); if (likely(err == 1)) - err = dst_output(sk, skb); + err = dst_output(net, sk, skb); return err; } -EXPORT_SYMBOL_GPL(ip6_local_out_sk); - -int ip6_local_out(struct sk_buff *skb) -{ - return ip6_local_out_sk(skb->sk, skb); -} EXPORT_SYMBOL_GPL(ip6_local_out); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fec0151522a2..dc65ec198f7c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -655,7 +655,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, - NULL, rt->dst.dev, dst_output_okfn); + NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 66a6b2c485dd..2701cb3d88e9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -61,6 +61,7 @@ #include <net/nexthop.h> #include <net/lwtunnel.h> #include <net/ip_tunnels.h> +#include <net/l3mdev.h> #include <asm/uaccess.h> @@ -86,9 +87,9 @@ static void ip6_dst_ifdown(struct dst_entry *, static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); -static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); +static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -142,6 +143,9 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct net_device *loopback_dev = net->loopback_dev; int cpu; + if (dev == loopback_dev) + return; + for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); struct rt6_info *rt; @@ -151,14 +155,12 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; - if (rt_idev && (rt_idev->dev == dev || !dev) && - rt_idev->dev != loopback_dev) { + if (rt_idev->dev == dev) { rt->rt6i_idev = in6_dev_get(loopback_dev); in6_dev_put(rt_idev); } - if (rt_dev && (rt_dev == dev || !dev) && - rt_dev != loopback_dev) { + if (rt_dev == dev) { rt->dst.dev = loopback_dev; dev_hold(rt->dst.dev); dev_put(rt_dev); @@ -247,12 +249,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, { } -static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, - unsigned long old) -{ - return NULL; -} - static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, .destroy = ip6_dst_destroy, @@ -261,7 +257,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .redirect = ip6_rt_blackhole_redirect, - .cow_metrics = ip6_rt_blackhole_cow_metrics, + .cow_metrics = dst_cow_metrics_generic, .neigh_lookup = ip6_neigh_lookup, }; @@ -308,7 +304,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, - .output = dst_discard_sk, + .output = dst_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -318,6 +314,15 @@ static const struct rt6_info ip6_blk_hole_entry_template = { #endif +static void rt6_info_init(struct rt6_info *rt) +{ + struct dst_entry *dst = &rt->dst; + + memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); + INIT_LIST_HEAD(&rt->rt6i_siblings); + INIT_LIST_HEAD(&rt->rt6i_uncached); +} + /* allocate dst with ip6_dst_ops */ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct net_device *dev, @@ -326,13 +331,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); - if (rt) { - struct dst_entry *dst = &rt->dst; + if (rt) + rt6_info_init(rt); - memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - INIT_LIST_HEAD(&rt->rt6i_siblings); - INIT_LIST_HEAD(&rt->rt6i_uncached); - } return rt; } @@ -1044,6 +1045,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + oif = 0; + redo_rt6_select: rt = rt6_select(fn, oif, strict); if (rt->rt6i_nsiblings) @@ -1141,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = skb->dev->ifindex, + .flowi6_iif = l3mdev_fib_oif(skb->dev), .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -1165,14 +1169,22 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { + struct dst_entry *dst; int flags = 0; + bool any_src; + + dst = l3mdev_rt6_dst_by_oif(net, fl6); + if (dst) + return dst; fl6->flowi6_iif = LOOPBACK_IFINDEX; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) + any_src = ipv6_addr_any(&fl6->saddr); + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || + (fl6->flowi6_oif && any_src)) flags |= RT6_LOOKUP_F_IFACE; - if (!ipv6_addr_any(&fl6->saddr)) + if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); @@ -1188,24 +1200,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); if (rt) { - new = &rt->dst; - - memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); + rt6_info_init(rt); + new = &rt->dst; new->__use = 1; new->input = dst_discard; - new->output = dst_discard_sk; + new->output = dst_discard_out; - if (dst_metrics_read_only(&ort->dst)) - new->_metrics = ort->dst._metrics; - else - dst_copy_metrics(new, &ort->dst); + dst_copy_metrics(new, &ort->dst); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags; + rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); @@ -1723,21 +1731,21 @@ static int ip6_convert_metrics(struct mx6_config *mxc, return -EINVAL; } -int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) +static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) { - int err; struct net *net = cfg->fc_nlinfo.nl_net; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; int addr_type; + int err = -EINVAL; if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) - return -EINVAL; + goto out; #ifndef CONFIG_IPV6_SUBTREES if (cfg->fc_src_len) - return -EINVAL; + goto out; #endif if (cfg->fc_ifindex) { err = -ENODEV; @@ -1852,7 +1860,7 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; - rt->dst.output = dst_discard_sk; + rt->dst.output = dst_discard_out; rt->dst.input = dst_discard; break; case RTN_PROHIBIT: @@ -1957,9 +1965,7 @@ install_route: cfg->fc_nlinfo.nl_net = dev_net(dev); - *rt_ret = rt; - - return 0; + return rt; out: if (dev) dev_put(dev); @@ -1968,20 +1974,21 @@ out: if (rt) dst_free(&rt->dst); - *rt_ret = NULL; - - return err; + return ERR_PTR(err); } int ip6_route_add(struct fib6_config *cfg) { struct mx6_config mxc = { .mx = NULL, }; - struct rt6_info *rt = NULL; + struct rt6_info *rt; int err; - err = ip6_route_info_create(cfg, &rt); - if (err) + rt = ip6_route_info_create(cfg); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; goto out; + } err = ip6_convert_metrics(&mxc, cfg); if (err) @@ -2263,7 +2270,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = ifindex, .fc_dst_len = prefixlen, @@ -2274,6 +2280,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; + cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO; cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2314,7 +2321,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, + .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -2361,7 +2368,8 @@ static void rtmsg_to_fib6_config(struct net *net, { memset(cfg, 0, sizeof(*cfg)); - cfg->fc_table = RT6_TABLE_MAIN; + cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? + : RT6_TABLE_MAIN; cfg->fc_ifindex = rtmsg->rtmsg_ifindex; cfg->fc_metric = rtmsg->rtmsg_metric; cfg->fc_expires = rtmsg->rtmsg_info; @@ -2445,7 +2453,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) +static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); @@ -2456,7 +2464,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) +static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); @@ -2470,6 +2478,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast) { + u32 tb_id; struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, DST_NOCOUNT); @@ -2492,7 +2501,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); + tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; + rt->rt6i_table = fib6_get_table(net, tb_id); rt->dst.flags |= DST_NOCACHE; atomic_set(&rt->dst.__refcnt, 1); @@ -2597,7 +2607,8 @@ void rt6_ifdown(struct net *net, struct net_device *dev) fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); - rt6_uncached_list_flush_dev(net, dev); + if (dev) + rt6_uncached_list_flush_dev(net, dev); } struct rt6_mtu_change_arg { @@ -2870,9 +2881,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) r_cfg.fc_encap_type = nla_get_u16(nla); } - err = ip6_route_info_create(&r_cfg, &rt); - if (err) + rt = ip6_route_info_create(&r_cfg); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; goto cleanup; + } err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); if (err) { @@ -3251,6 +3265,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) } else { fl6.flowi6_oif = oif; + if (netif_index_is_l3_master(net, oif)) { + fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | + FLOWI_FLAG_SKIP_NH_OIF; + } + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2461b3ff9551..bb8f2fa1c7fb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -114,14 +114,11 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, } EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); -__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); - tcp_synq_overflow(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); - return __cookie_v6_init_sequence(iph, th, mssp); } @@ -173,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); + req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false); if (!req) goto out; @@ -238,9 +235,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; } - req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); + req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, - &req->rcv_wnd, &req->window_clamp, + &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 16fb299dcab8..714bc5ad096e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -70,8 +70,8 @@ #include <linux/crypto.h> #include <linux/scatterlist.h> -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); -static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); +static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); @@ -82,7 +82,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; #else -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, const struct in6_addr *addr) { return NULL; @@ -437,8 +437,8 @@ out: static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, - u16 queue_mapping, - struct tcp_fastopen_cookie *foc) + struct tcp_fastopen_cookie *foc, + bool attach_req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -447,10 +447,11 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, int err = -ENOMEM; /* First, grab a route. */ - if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) + if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, + IPPROTO_TCP)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, foc); + skb = tcp_make_synack(sk, dst, req, foc, attach_req); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -460,7 +461,6 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); - skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); } @@ -621,8 +621,12 @@ clear_hash_noput: return 1; } -static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +#endif + +static bool tcp_v6_inbound_md5_hash(const struct sock *sk, + const struct sk_buff *skb) { +#ifdef CONFIG_TCP_MD5SIG const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -659,9 +663,9 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) &ip6h->daddr, ntohs(th->dest)); return true; } +#endif return false; } -#endif static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, @@ -688,13 +692,14 @@ static void tcp_v6_init_req(struct request_sock *req, } } -static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, +static struct dst_entry *tcp_v6_route_req(const struct sock *sk, + struct flowi *fl, const struct request_sock *req, bool *strict) { if (strict) *strict = true; - return inet6_csk_route_req(sk, &fl->u.ip6, req); + return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } struct request_sock_ops tcp6_request_sock_ops __read_mostly = { @@ -721,10 +726,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .route_req = tcp_v6_route_req, .init_seq = tcp_v6_init_sequence, .send_synack = tcp_v6_send_synack, - .queue_hash_add = inet6_csk_reqsk_queue_hash_add, }; -static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, +static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, u8 tclass, u32 label) @@ -823,7 +827,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, kfree_skb(buff); } -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; @@ -894,7 +898,7 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq, +static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) @@ -917,7 +921,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) inet_twsk_put(tw); } -static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, +static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV @@ -925,44 +929,18 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, req->rcv_wnd, + tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } -static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) +static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) { +#ifdef CONFIG_SYN_COOKIES const struct tcphdr *th = tcp_hdr(skb); - struct request_sock *req; - struct sock *nsk; - - /* Find possible connection requests. */ - req = inet6_csk_search_req(sk, th->source, - &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); - if (req) { - nsk = tcp_check_req(sk, skb, req, false); - if (!nsk || nsk == sk) - reqsk_put(req); - return nsk; - } - nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, - &ipv6_hdr(skb)->saddr, th->source, - &ipv6_hdr(skb)->daddr, ntohs(th->dest), - tcp_v6_iif(skb)); - - if (nsk) { - if (nsk->sk_state != TCP_TIME_WAIT) { - bh_lock_sock(nsk); - return nsk; - } - inet_twsk_put(inet_twsk(nsk)); - return NULL; - } -#ifdef CONFIG_SYN_COOKIES if (!th->syn) sk = cookie_v6_check(sk, skb); #endif @@ -985,12 +963,15 @@ drop: return 0; /* don't send reset */ } -static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, +static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; - struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_pinfo *newnp; + const struct ipv6_pinfo *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; struct tcp_sock *newtp; @@ -1005,7 +986,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * v6 mapped */ - newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); + newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, + req_unhash, own_req); if (!newsk) return NULL; @@ -1058,7 +1040,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto out_overflow; if (!dst) { - dst = inet6_csk_route_req(sk, &fl6, req); + dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); if (!dst) goto out; } @@ -1166,7 +1148,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_done(newsk); goto out; } - __inet_hash(newsk, NULL); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); return newsk; @@ -1180,7 +1162,7 @@ out: } /* The socket must have it's spinlock held when we get - * here. + * here, unless it is a TCP_LISTEN socket. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. @@ -1251,18 +1233,14 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) goto csum_err; if (sk->sk_state == TCP_LISTEN) { - struct sock *nsk = tcp_v6_hnd_req(sk, skb); + struct sock *nsk = tcp_v6_cookie_check(sk, skb); + if (!nsk) goto discard; - /* - * Queue it on the new socket if the new socket is active, - * otherwise we just shortcircuit this and continue with - * the new socket.. - */ if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(sk, skb); + sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1272,7 +1250,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb); - if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) + if (tcp_rcv_state_process(sk, skb)) goto reset; if (opt_skb) goto ipv6_pktoptions; @@ -1386,6 +1364,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) th = tcp_hdr(skb); hdr = ipv6_hdr(skb); +lookup: sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, inet6_iif(skb)); if (!sk) @@ -1395,6 +1374,37 @@ process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + struct sock *nsk = NULL; + + sk = req->rsk_listener; + tcp_v6_fill_cb(skb, hdr, th); + if (tcp_v6_inbound_md5_hash(sk, skb)) { + reqsk_put(req); + goto discard_it; + } + if (likely(sk->sk_state == TCP_LISTEN)) { + nsk = tcp_check_req(sk, skb, req, false); + } else { + inet_csk_reqsk_queue_drop_and_put(sk, req); + goto lookup; + } + if (!nsk) { + reqsk_put(req); + goto discard_it; + } + if (nsk == sk) { + sock_hold(sk); + reqsk_put(req); + tcp_v6_restore_cb(skb); + } else if (tcp_child_process(sk, nsk, skb)) { + tcp_v6_send_reset(nsk, skb); + goto discard_it; + } else { + return 0; + } + } if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; @@ -1405,17 +1415,21 @@ process: tcp_v6_fill_cb(skb, hdr, th); -#ifdef CONFIG_TCP_MD5SIG if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; -#endif if (sk_filter(sk, skb)) goto discard_and_relse; - sk_incoming_cpu_update(sk); skb->dev = NULL; + if (sk->sk_state == TCP_LISTEN) { + ret = tcp_v6_do_rcv(sk, skb); + goto put_and_return; + } + + sk_incoming_cpu_update(sk); + bh_lock_sock_nested(sk); tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; @@ -1430,6 +1444,7 @@ process: } bh_unlock_sock(sk); +put_and_return: sock_put(sk); return ret ? -1 : 0; @@ -1630,7 +1645,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - struct request_sock *req, int i, kuid_t uid) + const struct request_sock *req, int i) { long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; @@ -1654,7 +1669,8 @@ static void get_openreq6(struct seq_file *seq, 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->num_timeout, - from_kuid_munged(seq_user_ns(seq), uid), + from_kuid_munged(seq_user_ns(seq), + sock_i_uid(req->rsk_listener)), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); @@ -1669,7 +1685,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; + const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; @@ -1713,7 +1729,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, sp->sk_state == TCP_LISTEN ? - (fastopenq ? fastopenq->max_qlen : 0) : + fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } @@ -1759,18 +1775,12 @@ static int tcp6_seq_show(struct seq_file *seq, void *v) } st = seq->private; - switch (st->state) { - case TCP_SEQ_STATE_LISTENING: - case TCP_SEQ_STATE_ESTABLISHED: - if (sk->sk_state == TCP_TIME_WAIT) - get_timewait6_sock(seq, v, st->num); - else - get_tcp6_sock(seq, v, st->num); - break; - case TCP_SEQ_STATE_OPENREQ: - get_openreq6(seq, v, st->num, st->uid); - break; - } + if (sk->sk_state == TCP_TIME_WAIT) + get_timewait6_sock(seq, v, st->num); + else if (sk->sk_state == TCP_NEW_SYN_RECV) + get_openreq6(seq, v, st->num); + else + get_tcp6_sock(seq, v, st->num); out: return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0aba654f5b91..01bcb49619ee 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -182,10 +182,12 @@ static inline int compute_score(struct sock *sk, struct net *net, score++; } + if (sk->sk_incoming_cpu == raw_smp_processor_id()) + score++; + return score; } -#define SCORE2_MAX (1 + 1 + 1) static inline int compute_score2(struct sock *sk, struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, @@ -223,6 +225,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, score++; } + if (sk->sk_incoming_cpu == raw_smp_processor_id()) + score++; + return score; } @@ -251,8 +256,7 @@ begin: hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); matches = 1; - } else if (score == SCORE2_MAX) - goto exact_match; + } } else if (score == badness && reuseport) { matches++; if (reciprocal_scale(hash, matches) == 0) @@ -269,7 +273,6 @@ begin: goto begin; if (result) { -exact_match: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 0c3e9ffcf231..4d09ce6fa90e 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); if (xfrm6_local_dontfrag(skb)) xfrm6_local_rxpmtu(skb, mtu); @@ -131,45 +132,55 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) return xfrm_output(sk, skb); } +static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + + return x->outer_mode->afinfo->output_finish(sk, skb); +} + static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; int mtu; + bool toobig; #ifdef CONFIG_NETFILTER if (!x) { IP6CB(skb)->flags |= IP6SKB_REROUTED; - return dst_output(sk, skb); + return dst_output(net, sk, skb); } #endif + if (x->props.mode != XFRM_MODE_TUNNEL) + goto skip_frag; + if (skb->protocol == htons(ETH_P_IPV6)) mtu = ip6_skb_dst_mtu(skb); else mtu = dst_mtu(skb_dst(skb)); - if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { + toobig = skb->len > mtu && !skb_is_gso(skb); + + if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } - if (x->props.mode == XFRM_MODE_TUNNEL && - ((skb->len > mtu && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(sk, skb, - x->outer_mode->afinfo->output_finish); - } + if (toobig || dst_allfrag(skb_dst(skb))) + return ip6_fragment(net, sk, skb, + __xfrm6_output_finish); + +skip_frag: return x->outer_mode->afinfo->output_finish(sk, skb); } -int xfrm6_output(struct sock *sk, struct sk_buff *skb) +int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net *net = dev_net(skb_dst(skb)->dev); - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, __xfrm6_output, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2fad59320b6c..5643423fe67a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -20,7 +20,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -37,6 +37,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; + fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); @@ -132,10 +133,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; - if (skb_dst(skb)) { - oif = vrf_master_ifindex(skb_dst(skb)->dev) ? - : skb_dst(skb)->dev->ifindex; - } + if (skb_dst(skb)) + oif = l3mdev_fib_oif(skb_dst(skb)->dev); memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; @@ -178,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) return; case IPPROTO_ICMPV6: - if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { + if (!onlyproto && (nh + offset + 2 < skb->data || + pskb_may_pull(skb, nh + offset + 2 - skb->data))) { u8 *icmp; nh = skb_network_header(skb); @@ -192,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); - if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { + if (!onlyproto && (nh + offset + 3 < skb->data || + pskb_may_pull(skb, nh + offset + 3 - skb->data))) { struct ip6_mh *mh; nh = skb_network_header(skb); |