diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 234 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 24 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 8 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 30 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 62 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 5 | ||||
-rw-r--r-- | net/ipv6/ioam6.c | 19 | ||||
-rw-r--r-- | net/ipv6/ioam6_iptunnel.c | 59 | ||||
-rw-r--r-- | net/ipv6/ip6_input.c | 3 | ||||
-rw-r--r-- | net/ipv6/ip6_offload.c | 5 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 116 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 8 | ||||
-rw-r--r-- | net/ipv6/ip6mr.c | 53 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 6 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 55 | ||||
-rw-r--r-- | net/ipv6/netfilter.c | 5 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_reasm.c | 1 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_dup_ipv6.c | 1 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_fib_ipv6.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_reject_ipv6.c | 1 | ||||
-rw-r--r-- | net/ipv6/ping.c | 29 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 1 | ||||
-rw-r--r-- | net/ipv6/route.c | 60 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 104 | ||||
-rw-r--r-- | net/ipv6/udp.c | 114 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 4 |
26 files changed, 543 insertions, 466 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f908e2fd30b2..b22504176588 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -146,18 +146,11 @@ static int ipv6_generate_stable_address(struct in6_addr *addr, #define IN6_ADDR_HSIZE_SHIFT 8 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) -/* - * Configured unicast address hash table - */ -static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; -static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(void); -static void addrconf_verify_rtnl(void); -static void addrconf_verify_work(struct work_struct *); +static void addrconf_verify(struct net *net); +static void addrconf_verify_rtnl(struct net *net); static struct workqueue_struct *addrconf_wq; -static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -379,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ASSERT_RTNL(); - if (dev->mtu < IPV6_MIN_MTU) + if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); @@ -416,12 +409,13 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) return ERR_PTR(err); } - if (snmp6_register_dev(ndev) < 0) { - netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", - __func__, dev->name); - goto err_release; + if (dev != blackhole_netdev) { + if (snmp6_register_dev(ndev) < 0) { + netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", + __func__, dev->name); + goto err_release; + } } - /* One reference from device. */ refcount_set(&ndev->refcnt, 1); @@ -452,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; - err = addrconf_sysctl_register(ndev); - if (err) { - ipv6_mc_destroy_dev(ndev); - snmp6_unregister_dev(ndev); - goto err_release; + if (dev != blackhole_netdev) { + err = addrconf_sysctl_register(ndev); + if (err) { + ipv6_mc_destroy_dev(ndev); + snmp6_unregister_dev(ndev); + goto err_release; + } } /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); - /* Join interface-local all-node multicast group */ - ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); - - /* Join all-node multicast group */ - ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); + if (dev != blackhole_netdev) { + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); - /* Join all-router multicast group if forwarding is set */ - if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST)) - ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + /* Join all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); + /* Join all-router multicast group if forwarding is set */ + if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST)) + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + } return ndev; err_release: @@ -554,7 +551,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - devconf->mc_forwarding) < 0) + atomic_read(&devconf->mc_forwarding)) < 0) goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && @@ -1011,9 +1008,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev) return true; @@ -1024,20 +1019,21 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) { - unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr); + struct net *net = dev_net(dev); + unsigned int hash = inet6_addr_hash(net, &ifa->addr); int err = 0; - spin_lock(&addrconf_hash_lock); + spin_lock(&net->ipv6.addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) { + if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) { netdev_dbg(dev, "ipv6_add_addr: already assigned\n"); err = -EEXIST; } else { - hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); + hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]); } - spin_unlock(&addrconf_hash_lock); + spin_unlock(&net->ipv6.addrconf_hash_lock); return err; } @@ -1119,6 +1115,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, ifa->prefix_len = cfg->plen; ifa->rt_priority = cfg->rt_priority; ifa->flags = cfg->ifa_flags; + ifa->ifa_proto = cfg->ifa_proto; /* No need to add the TENTATIVE flag for addresses with NODAD */ if (!(cfg->ifa_flags & IFA_F_NODAD)) ifa->flags |= IFA_F_TENTATIVE; @@ -1261,9 +1258,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - int state; enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; + struct net *net = dev_net(ifp->idev->dev); unsigned long expires; + int state; ASSERT_RTNL(); @@ -1275,9 +1273,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (state == INET6_IFADDR_STATE_DEAD) goto out; - spin_lock_bh(&addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); hlist_del_init_rcu(&ifp->addr_lst); - spin_unlock_bh(&addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); write_lock_bh(&ifp->idev->lock); @@ -1920,10 +1918,8 @@ __ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, if (skip_dev_check) dev = NULL; - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { ndev = ifp->idev->dev; - if (!net_eq(dev_net(ndev), net)) - continue; if (l3mdev_master_dev_rcu(ndev) != l3mdev) continue; @@ -2027,9 +2023,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add struct inet6_ifaddr *ifp, *result = NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { @@ -2096,7 +2090,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; - struct net *net = dev_net(ifp->idev->dev); + struct net *net = dev_net(idev->dev); if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -2600,6 +2594,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, .valid_lft = valid_lft, .preferred_lft = prefered_lft, .scope = addr_type & IPV6_ADDR_SCOPE_MASK, + .ifa_proto = IFAPROT_KERNEL_RA }; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -2675,7 +2670,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, create, now); in6_ifa_put(ifp); - addrconf_verify(); + addrconf_verify(net); } return 0; @@ -2987,7 +2982,7 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, cfg->valid_lft, cfg->preferred_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); return 0; } else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, false, @@ -3027,7 +3022,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, manage_tempaddrs(idev, ifp, 0, 0, false, jiffies); ipv6_del_addr(ifp); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); if (ipv6_addr_is_multicast(pfx)) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, false, pfx, dev->ifindex); @@ -3084,7 +3079,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) } static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, - int plen, int scope) + int plen, int scope, u8 proto) { struct inet6_ifaddr *ifp; struct ifa6_config cfg = { @@ -3093,7 +3088,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, .ifa_flags = IFA_F_PERMANENT, .valid_lft = INFINITY_LIFE_TIME, .preferred_lft = INFINITY_LIFE_TIME, - .scope = scope + .scope = scope, + .ifa_proto = proto }; ifp = ipv6_add_addr(idev, &cfg, true, NULL); @@ -3138,7 +3134,7 @@ static void add_v4_addrs(struct inet6_dev *idev) } if (addr.s6_addr32[3]) { - add_addr(idev, &addr, plen, scope); + add_addr(idev, &addr, plen, scope, IFAPROT_UNSPEC); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, GFP_KERNEL); return; @@ -3161,7 +3157,8 @@ static void add_v4_addrs(struct inet6_dev *idev) flag |= IFA_HOST; } - add_addr(idev, &addr, plen, flag); + add_addr(idev, &addr, plen, flag, + IFAPROT_UNSPEC); addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags, GFP_KERNEL); } @@ -3184,7 +3181,7 @@ static void init_loopback(struct net_device *dev) return; } - add_addr(idev, &in6addr_loopback, 128, IFA_HOST); + add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFAPROT_KERNEL_LO); } void addrconf_add_linklocal(struct inet6_dev *idev, @@ -3196,7 +3193,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, .ifa_flags = flags | IFA_F_PERMANENT, .valid_lft = INFINITY_LIFE_TIME, .preferred_lft = INFINITY_LIFE_TIME, - .scope = IFA_LINK + .scope = IFA_LINK, + .ifa_proto = IFAPROT_KERNEL_LL }; struct inet6_ifaddr *ifp; @@ -3773,9 +3771,9 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { - struct hlist_head *h = &inet6_addr_lst[i]; + struct hlist_head *h = &net->ipv6.inet6_addr_lst[i]; - spin_lock_bh(&addrconf_hash_lock); + spin_lock_bh(&net->ipv6.addrconf_hash_lock); restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { @@ -3791,7 +3789,7 @@ restart: } } } - spin_unlock_bh(&addrconf_hash_lock); + spin_unlock_bh(&net->ipv6.addrconf_hash_lock); } write_lock_bh(&idev->lock); @@ -4250,7 +4248,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, * before this temporary address becomes deprecated. */ if (ifp->flags & IFA_F_TEMPORARY) - addrconf_verify_rtnl(); + addrconf_verify_rtnl(dev_net(dev)); } static void addrconf_dad_run(struct inet6_dev *idev, bool restart) @@ -4292,10 +4290,8 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu(ifa, &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; /* sync with offset */ if (p < state->offset) { p++; @@ -4318,8 +4314,6 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); hlist_for_each_entry_continue_rcu(ifa, addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; state->offset++; return ifa; } @@ -4327,9 +4321,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, state->offset = 0; while (++state->bucket < IN6_ADDR_HSIZE) { hlist_for_each_entry_rcu(ifa, - &inet6_addr_lst[state->bucket], addr_lst) { - if (!net_eq(dev_net(ifa->idev->dev), net)) - continue; + &net->ipv6.inet6_addr_lst[state->bucket], addr_lst) { return ifa; } } @@ -4417,9 +4409,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) int ret = 0; rcu_read_lock(); - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr) && (ifp->flags & IFA_F_HOMEADDRESS)) { ret = 1; @@ -4457,9 +4447,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, hash = inet6_addr_hash(net, addr); hash_found = false; - hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; + hlist_for_each_entry_rcu(ifp, &net->ipv6.inet6_addr_lst[hash], addr_lst) { if (ipv6_addr_equal(&ifp->addr, addr)) { hash_found = true; @@ -4488,7 +4476,7 @@ int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, * Periodic address status verification */ -static void addrconf_verify_rtnl(void) +static void addrconf_verify_rtnl(struct net *net) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; @@ -4500,11 +4488,11 @@ static void addrconf_verify_rtnl(void) now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - cancel_delayed_work(&addr_chk_work); + cancel_delayed_work(&net->ipv6.addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &net->ipv6.inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -4603,20 +4591,23 @@ restart: pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); + mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } static void addrconf_verify_work(struct work_struct *w) { + struct net *net = container_of(to_delayed_work(w), struct net, + ipv6.addr_chk_work); + rtnl_lock(); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); rtnl_unlock(); } -static void addrconf_verify(void) +static void addrconf_verify(struct net *net) { - mod_delayed_work(addrconf_wq, &addr_chk_work, 0); + mod_delayed_work(addrconf_wq, &net->ipv6.addr_chk_work, 0); } static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, @@ -4645,6 +4636,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { [IFA_FLAGS] = { .len = sizeof(u32) }, [IFA_RT_PRIORITY] = { .len = sizeof(u32) }, [IFA_TARGET_NETNSID] = { .type = NLA_S32 }, + [IFA_PROTO] = { .type = NLA_U8 }, }; static int @@ -4712,7 +4704,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, return 0; } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) +static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, + struct ifa6_config *cfg) { u32 flags; clock_t expires; @@ -4769,6 +4762,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) ifp->tstamp = jiffies; ifp->valid_lft = cfg->valid_lft; ifp->prefered_lft = cfg->preferred_lft; + ifp->ifa_proto = cfg->ifa_proto; if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority) ifp->rt_priority = cfg->rt_priority; @@ -4826,7 +4820,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) jiffies); } - addrconf_verify_rtnl(); + addrconf_verify_rtnl(net); return 0; } @@ -4862,6 +4856,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFA_RT_PRIORITY]) cfg.rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]); + if (tb[IFA_PROTO]) + cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]); + cfg.valid_lft = INFINITY_LIFE_TIME; cfg.preferred_lft = INFINITY_LIFE_TIME; @@ -4913,7 +4910,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, !(nlh->nlmsg_flags & NLM_F_REPLACE)) err = -EEXIST; else - err = inet6_addr_modify(ifa, &cfg); + err = inet6_addr_modify(net, ifa, &cfg); in6_ifa_put(ifa); @@ -4965,6 +4962,7 @@ static inline int inet6_ifaddr_msgsize(void) + nla_total_size(16) /* IFA_ADDRESS */ + nla_total_size(sizeof(struct ifa_cacheinfo)) + nla_total_size(4) /* IFA_FLAGS */ + + nla_total_size(1) /* IFA_PROTO */ + nla_total_size(4) /* IFA_RT_PRIORITY */; } @@ -5044,6 +5042,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) goto error; + if (ifa->ifa_proto && + nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) + goto error; + nlmsg_end(skb, nlh); return 0; @@ -5539,7 +5541,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; + array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; @@ -5800,7 +5802,7 @@ update_lft: write_unlock_bh(&idev->lock); inet6_ifinfo_notify(RTM_NEWLINK, idev); - addrconf_verify_rtnl(); + addrconf_verify_rtnl(dev_net(dev)); return 0; } @@ -7117,6 +7119,14 @@ static int __net_init addrconf_init_net(struct net *net) int err = -ENOMEM; struct ipv6_devconf *all, *dflt; + spin_lock_init(&net->ipv6.addrconf_hash_lock); + INIT_DEFERRABLE_WORK(&net->ipv6.addr_chk_work, addrconf_verify_work); + net->ipv6.inet6_addr_lst = kcalloc(IN6_ADDR_HSIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!net->ipv6.inet6_addr_lst) + goto err_alloc_addr; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); if (!all) goto err_alloc_all; @@ -7178,11 +7188,15 @@ err_reg_all: err_alloc_dflt: kfree(all); err_alloc_all: + kfree(net->ipv6.inet6_addr_lst); +err_alloc_addr: return err; } static void __net_exit addrconf_exit_net(struct net *net) { + int i; + #ifdef CONFIG_SYSCTL __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt, NETCONFA_IFINDEX_DEFAULT); @@ -7190,7 +7204,19 @@ static void __net_exit addrconf_exit_net(struct net *net) NETCONFA_IFINDEX_ALL); #endif kfree(net->ipv6.devconf_dflt); + net->ipv6.devconf_dflt = NULL; kfree(net->ipv6.devconf_all); + net->ipv6.devconf_all = NULL; + + cancel_delayed_work_sync(&net->ipv6.addr_chk_work); + /* + * Check hash table, then free it. + */ + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON_ONCE(!hlist_empty(&net->ipv6.inet6_addr_lst[i])); + + kfree(net->ipv6.inet6_addr_lst); + net->ipv6.inet6_addr_lst = NULL; } static struct pernet_operations addrconf_ops = { @@ -7213,7 +7239,7 @@ static struct rtnl_af_ops inet6_ops __read_mostly = { int __init addrconf_init(void) { struct inet6_dev *idev; - int i, err; + int err; err = ipv6_addr_label_init(); if (err < 0) { @@ -7232,26 +7258,8 @@ int __init addrconf_init(void) goto out_nowq; } - /* The addrconf netdev notifier requires that loopback_dev - * has it's ipv6 private information allocated and setup - * before it can bring up and give link-local addresses - * to other devices which are up. - * - * Unfortunately, loopback_dev is not necessarily the first - * entry in the global dev_base list of net devices. In fact, - * it is likely to be the very last entry on that list. - * So this causes the notifier registry below to try and - * give link-local addresses to all devices besides loopback_dev - * first, then loopback_dev, which cases all the non-loopback_dev - * devices to fail to get a link-local address. - * - * So, as a temporary fix, allocate the ipv6 structure for - * loopback_dev first by hand. - * Longer term, all of the dependencies ipv6 has upon the loopback - * device and it being up should be removed. - */ rtnl_lock(); - idev = ipv6_add_dev(init_net.loopback_dev); + idev = ipv6_add_dev(blackhole_netdev); rtnl_unlock(); if (IS_ERR(idev)) { err = PTR_ERR(idev); @@ -7260,12 +7268,9 @@ int __init addrconf_init(void) ip6_route_init_special_entries(); - for (i = 0; i < IN6_ADDR_HSIZE; i++) - INIT_HLIST_HEAD(&inet6_addr_lst[i]); - register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(); + addrconf_verify(&init_net); rtnl_af_register(&inet6_ops); @@ -7323,7 +7328,6 @@ out: void addrconf_cleanup(void) { struct net_device *dev; - int i; unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); @@ -7341,14 +7345,6 @@ void addrconf_cleanup(void) } addrconf_ifdown(init_net.loopback_dev, true); - /* - * Check hash table. - */ - spin_lock_bh(&addrconf_hash_lock); - for (i = 0; i < IN6_ADDR_HSIZE; i++) - WARN_ON(!hlist_empty(&inet6_addr_lst[i])); - spin_unlock_bh(&addrconf_hash_lock); - cancel_delayed_work(&addr_chk_work); rtnl_unlock(); destroy_workqueue(addrconf_wq); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8fe7900f1949..7d7b7523d126 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -441,11 +441,14 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; u32 flags = BIND_WITH_LOCK; + const struct proto *prot; int err = 0; + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); /* If the socket has its own bind function then use it. */ - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); + if (prot->bind) + return prot->bind(sk, uaddr, addr_len); if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -555,6 +558,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); + const struct proto *prot; switch (cmd) { case SIOCADDRT: @@ -572,9 +576,11 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, argp); default: - if (!sk->sk_prot->ioctl) + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (!prot->ioctl) return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); + return prot->ioctl(sk, cmd, arg); } /*NOTREACHED*/ return 0; @@ -636,11 +642,14 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, sk, msg, size); } @@ -650,13 +659,16 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); - err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 77e34aec7e82..658d5eabaf7e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -1344,14 +1344,14 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return opt2; } -struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, - struct ipv6_txoptions *opt) +struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, + struct ipv6_txoptions *opt) { /* * ignore the dest before srcrt unless srcrt is being included. * --yoshfuji */ - if (opt && opt->dst0opt && !opt->srcrt) { + if (opt->dst0opt && !opt->srcrt) { if (opt_space != opt) { memcpy(opt_space, opt, sizeof(*opt_space)); opt = opt_space; @@ -1362,7 +1362,7 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, return opt; } -EXPORT_SYMBOL_GPL(ipv6_fixup_options); +EXPORT_SYMBOL_GPL(__ipv6_fixup_options); /** * fl6_update_dst - update flowi destination address with info given diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ec029c86ae06..7c2003833010 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -16,6 +16,7 @@ #include <linux/indirect_call_wrapper.h> #include <net/fib_rules.h> +#include <net/inet_dscp.h> #include <net/ipv6.h> #include <net/addrconf.h> #include <net/ip6_route.h> @@ -25,14 +26,14 @@ struct fib6_rule { struct fib_rule common; struct rt6key src; struct rt6key dst; - u8 tclass; + dscp_t dscp; }; static bool fib6_rule_matchall(const struct fib_rule *rule) { struct fib6_rule *r = container_of(rule, struct fib6_rule, common); - if (r->dst.plen || r->src.plen || r->tclass) + if (r->dst.plen || r->src.plen || r->dscp) return false; return fib_rule_matchall(rule); } @@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, return 0; } - if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel)) + if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) return 0; if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) @@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; + if (!inet_validate_dscp(frh->tos)) { + NL_SET_ERR_MSG(extack, + "Invalid dsfield (tos): ECN bits must be 0"); + goto errout; + } + rule6->dscp = inet_dsfield_to_dscp(frh->tos); + if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid table"); @@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; - rule6->tclass = frh->tos; if (fib_rule_requires_fldissect(rule)) net->ipv6.fib6_rules_require_fldissect++; @@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) return 0; - if (frh->tos && (rule6->tclass != frh->tos)) + if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos) return 0; if (frh->src_len && @@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->dst_len = rule6->dst.plen; frh->src_len = rule6->src.plen; - frh->tos = rule6->tclass; + frh->tos = inet_dscp_to_dsfield(rule6->dscp); if ((rule6->dst.plen && nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || @@ -486,16 +493,21 @@ out_fib6_rules_ops: goto out; } -static void __net_exit fib6_rules_net_exit(struct net *net) +static void __net_exit fib6_rules_net_exit_batch(struct list_head *net_list) { + struct net *net; + rtnl_lock(); - fib_rules_unregister(net->ipv6.fib6_rules_ops); + list_for_each_entry(net, net_list, exit_list) { + fib_rules_unregister(net->ipv6.fib6_rules_ops); + cond_resched(); + } rtnl_unlock(); } static struct pernet_operations fib6_rules_net_ops = { .init = fib6_rules_net_init, - .exit = fib6_rules_net_exit, + .exit_batch = fib6_rules_net_exit_batch, }; int __init fib6_rules_init(void) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 96c5cc0f30ce..e6b978ea0e87 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -69,17 +69,7 @@ #include <linux/uaccess.h> -/* - * The ICMP socket(s). This is the most convenient way to flow control - * our ICMP output as well as maintain a clean interface throughout - * all layers. All Socketless IP sends will soon be gone. - * - * On SMP we have one ICMP socket per-cpu. - */ -static struct sock *icmpv6_sk(struct net *net) -{ - return this_cpu_read(*net->ipv6.icmp_sk); -} +static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk); static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) @@ -110,11 +100,11 @@ static const struct inet6_protocol icmpv6_protocol = { }; /* Called with BH disabled */ -static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) +static struct sock *icmpv6_xmit_lock(struct net *net) { struct sock *sk; - sk = icmpv6_sk(net); + sk = this_cpu_read(ipv6_icmp_sk); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an @@ -122,11 +112,13 @@ static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) */ return NULL; } + sock_net_set(sk, net); return sk; } -static __inline__ void icmpv6_xmit_unlock(struct sock *sk) +static void icmpv6_xmit_unlock(struct sock *sk) { + sock_net_set(sk, &init_net); spin_unlock(&sk->sk_lock.slock); } @@ -1034,59 +1026,27 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } -static void __net_exit icmpv6_sk_exit(struct net *net) -{ - int i; - - for_each_possible_cpu(i) - inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i)); - free_percpu(net->ipv6.icmp_sk); -} - -static int __net_init icmpv6_sk_init(struct net *net) +int __init icmpv6_init(void) { struct sock *sk; int err, i; - net->ipv6.icmp_sk = alloc_percpu(struct sock *); - if (!net->ipv6.icmp_sk) - return -ENOMEM; - for_each_possible_cpu(i) { err = inet_ctl_sock_create(&sk, PF_INET6, - SOCK_RAW, IPPROTO_ICMPV6, net); + SOCK_RAW, IPPROTO_ICMPV6, &init_net); if (err < 0) { pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", err); - goto fail; + return err; } - *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk; + per_cpu(ipv6_icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } - return 0; - - fail: - icmpv6_sk_exit(net); - return err; -} - -static struct pernet_operations icmpv6_sk_ops = { - .init = icmpv6_sk_init, - .exit = icmpv6_sk_exit, -}; - -int __init icmpv6_init(void) -{ - int err; - - err = register_pernet_subsys(&icmpv6_sk_ops); - if (err < 0) - return err; err = -EAGAIN; if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) @@ -1101,14 +1061,12 @@ sender_reg_err: inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); - unregister_pernet_subsys(&icmpv6_sk_ops); return err; } void icmpv6_cleanup(void) { inet6_unregister_icmp_sender(icmp6_send); - unregister_pernet_subsys(&icmpv6_sk_ops); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 4514444e96c8..4740afecf7c6 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) { int err = 0; - if (sk->sk_state != TCP_CLOSE) { - local_bh_disable(); + if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); - local_bh_enable(); - } return err; } diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index e159eb4328a8..1098131ed90c 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -635,7 +635,8 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_schema *sc, u8 sclen, bool is_input) { - struct __kernel_sock_timeval ts; + struct timespec64 ts; + ktime_t tstamp; u64 raw64; u32 raw32; u16 raw16; @@ -680,10 +681,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { - if (!skb->tstamp) - __net_timestamp(skb); + tstamp = skb_tstamp_cond(skb, true); + ts = ktime_to_timespec64(tstamp); - skb_get_new_timestamp(skb, &ts); *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); } data += sizeof(__be32); @@ -694,13 +694,12 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { - if (!skb->tstamp) - __net_timestamp(skb); + if (!trace->type.bit2) { + tstamp = skb_tstamp_cond(skb, true); + ts = ktime_to_timespec64(tstamp); + } - if (!trace->type.bit2) - skb_get_new_timestamp(skb, &ts); - - *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + *(__be32 *)data = cpu_to_be32((u32)(ts.tv_nsec / NSEC_PER_USEC)); } data += sizeof(__be32); } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f90a87389fcc..f6f5b83dd954 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -32,13 +32,25 @@ struct ioam6_lwt_encap { struct ioam6_trace_hdr traceh; } __packed; +struct ioam6_lwt_freq { + u32 k; + u32 n; +}; + struct ioam6_lwt { struct dst_cache cache; + struct ioam6_lwt_freq freq; + atomic_t pkt_cnt; u8 mode; struct in6_addr tundst; struct ioam6_lwt_encap tuninfo; }; +static struct netlink_range_validation freq_range = { + .min = IOAM6_IPTUNNEL_FREQ_MIN, + .max = IOAM6_IPTUNNEL_FREQ_MAX, +}; + static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) { return (struct ioam6_lwt *)lwt->data; @@ -55,6 +67,8 @@ static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt) } static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_FREQ_K] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range), + [IOAM6_IPTUNNEL_FREQ_N] = NLA_POLICY_FULL_RANGE(NLA_U32, &freq_range), [IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8, IOAM6_IPTUNNEL_MODE_MIN, IOAM6_IPTUNNEL_MODE_MAX), @@ -96,6 +110,7 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, struct lwtunnel_state *lwt; struct ioam6_lwt *ilwt; int len_aligned, err; + u32 freq_k, freq_n; u8 mode; if (family != AF_INET6) @@ -106,6 +121,23 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, if (err < 0) return err; + if ((!tb[IOAM6_IPTUNNEL_FREQ_K] && tb[IOAM6_IPTUNNEL_FREQ_N]) || + (tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N])) { + NL_SET_ERR_MSG(extack, "freq: missing parameter"); + return -EINVAL; + } else if (!tb[IOAM6_IPTUNNEL_FREQ_K] && !tb[IOAM6_IPTUNNEL_FREQ_N]) { + freq_k = IOAM6_IPTUNNEL_FREQ_MIN; + freq_n = IOAM6_IPTUNNEL_FREQ_MIN; + } else { + freq_k = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_K]); + freq_n = nla_get_u32(tb[IOAM6_IPTUNNEL_FREQ_N]); + + if (freq_k > freq_n) { + NL_SET_ERR_MSG(extack, "freq: k > n is forbidden"); + return -EINVAL; + } + } + if (!tb[IOAM6_IPTUNNEL_MODE]) mode = IOAM6_IPTUNNEL_MODE_INLINE; else @@ -140,6 +172,10 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla, return err; } + atomic_set(&ilwt->pkt_cnt, 0); + ilwt->freq.k = freq_k; + ilwt->freq.n = freq_n; + ilwt->mode = mode; if (tb[IOAM6_IPTUNNEL_DST]) ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]); @@ -263,11 +299,18 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; + u32 pkt_cnt; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; ilwt = ioam6_lwt_state(dst->lwtstate); + + /* Check for insertion frequency (i.e., "k over n" insertions) */ + pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt); + if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k) + goto out; + orig_daddr = ipv6_hdr(skb)->daddr; switch (ilwt->mode) { @@ -358,6 +401,14 @@ static int ioam6_fill_encap_info(struct sk_buff *skb, struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); int err; + err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_K, ilwt->freq.k); + if (err) + goto ret; + + err = nla_put_u32(skb, IOAM6_IPTUNNEL_FREQ_N, ilwt->freq.n); + if (err) + goto ret; + err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode); if (err) goto ret; @@ -379,7 +430,9 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate); int nlsize; - nlsize = nla_total_size(sizeof(ilwt->mode)) + + nlsize = nla_total_size(sizeof(ilwt->freq.k)) + + nla_total_size(sizeof(ilwt->freq.n)) + + nla_total_size(sizeof(ilwt->mode)) + nla_total_size(sizeof(ilwt->tuninfo.traceh)); if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) @@ -395,7 +448,9 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a); struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b); - return (ilwt_a->mode != ilwt_b->mode || + return (ilwt_a->freq.k != ilwt_b->freq.k || + ilwt_a->freq.n != ilwt_b->freq.n || + ilwt_a->mode != ilwt_b->mode || (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE && !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) || trace_a->namespace_id != trace_b->namespace_id); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 80256717868e..5b5ea35635f9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -459,6 +459,7 @@ discard: static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_clear_delivery_time(skb); rcu_read_lock(); ip6_protocol_deliver_rcu(net, skb, 0, false); rcu_read_unlock(); @@ -508,7 +509,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 5f577e21459b..c4fc03c1ac99 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -251,7 +251,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, if ((first_word & htonl(0xF00FFFFF)) || !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || - *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) { + iph->nexthdr != iph2->nexthdr) { not_same_flow: NAPI_GRO_CB(p)->same_flow = 0; continue; @@ -262,7 +262,8 @@ not_same_flow: goto not_same_flow; } /* flush if Traffic Class fields are different */ - NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); + NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) | + (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); NAPI_GRO_CB(p)->flush |= flush; /* If the previous IP ID value was based on an atomic diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 194832663d85..e23f058166af 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -130,7 +130,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * rcu_read_unlock_bh(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; } @@ -202,7 +202,7 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } } @@ -217,7 +217,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } @@ -440,7 +440,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, } #endif - skb->tstamp = 0; + skb_clear_tstamp(skb); return dst_output(net, sk, skb); } @@ -813,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; + bool mono_delivery_time = skb->mono_delivery_time; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; @@ -903,7 +904,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb->tstamp = tstamp; + skb_set_delivery_time(skb, tstamp, mono_delivery_time); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), @@ -962,7 +963,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - frag->tstamp = tstamp; + skb_set_delivery_time(frag, tstamp, mono_delivery_time); err = output(net, sk, frag); if (err) goto fail; @@ -1034,8 +1035,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -1350,11 +1350,16 @@ static void ip6_append_data_mtu(unsigned int *mtu, static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, - struct rt6_info *rt, struct flowi6 *fl6) + struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu; - struct ipv6_txoptions *opt = ipc6->opt; + struct ipv6_txoptions *nopt, *opt = ipc6->opt; + + /* callers pass dst together with a reference, set it first so + * ip6_cork_release() can put it down even in case of an error. + */ + cork->base.dst = &rt->dst; /* * setup for corking @@ -1363,39 +1368,32 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (WARN_ON(v6_cork->opt)) return -EINVAL; - v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); - if (unlikely(!v6_cork->opt)) + nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); + if (unlikely(!nopt)) return -ENOBUFS; - v6_cork->opt->tot_len = sizeof(*opt); - v6_cork->opt->opt_flen = opt->opt_flen; - v6_cork->opt->opt_nflen = opt->opt_nflen; + nopt->tot_len = sizeof(*opt); + nopt->opt_flen = opt->opt_flen; + nopt->opt_nflen = opt->opt_nflen; - v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, - sk->sk_allocation); - if (opt->dst0opt && !v6_cork->opt->dst0opt) + nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); + if (opt->dst0opt && !nopt->dst0opt) return -ENOBUFS; - v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, - sk->sk_allocation); - if (opt->dst1opt && !v6_cork->opt->dst1opt) + nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); + if (opt->dst1opt && !nopt->dst1opt) return -ENOBUFS; - v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, - sk->sk_allocation); - if (opt->hopopt && !v6_cork->opt->hopopt) + nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); + if (opt->hopopt && !nopt->hopopt) return -ENOBUFS; - v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, - sk->sk_allocation); - if (opt->srcrt && !v6_cork->opt->srcrt) + nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); + if (opt->srcrt && !nopt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } - dst_hold(&rt->dst); - cork->base.dst = &rt->dst; - cork->fl.u.ip6 = *fl6; v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) @@ -1424,9 +1422,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, } static int __ip6_append_data(struct sock *sk, - struct flowi6 *fl6, struct sk_buff_head *queue, - struct inet_cork *cork, + struct inet_cork_full *cork_full, struct inet6_cork *v6_cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, @@ -1435,6 +1432,8 @@ static int __ip6_append_data(struct sock *sk, unsigned int flags, struct ipcm6_cookie *ipc6) { struct sk_buff *skb, *skb_prev = NULL; + struct inet_cork *cork = &cork_full->base; + struct flowi6 *fl6 = &cork_full->fl.u.ip6; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; struct ubuf_info *uarg = NULL; int exthdrlen = 0; @@ -1491,6 +1490,7 @@ static int __ip6_append_data(struct sock *sk, if (cork->length + length > mtu - headersize && ipc6->dontfrag && (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_ICMPV6 || sk->sk_protocol == IPPROTO_RAW)) { ipv6_local_rxpmtu(sk, fl6, mtu - headersize + sizeof(struct ipv6hdr)); @@ -1791,34 +1791,46 @@ int ip6_append_data(struct sock *sk, /* * setup for corking */ + dst_hold(&rt->dst); err = ip6_setup_cork(sk, &inet->cork, &np->cork, - ipc6, rt, fl6); + ipc6, rt); if (err) return err; + inet->cork.fl.u.ip6 = *fl6; exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { - fl6 = &inet->cork.fl.u.ip6; transhdrlen = 0; } - return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, + return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, from, length, transhdrlen, flags, ipc6); } EXPORT_SYMBOL_GPL(ip6_append_data); +static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) +{ + struct dst_entry *dst = cork->base.dst; + + cork->base.dst = NULL; + cork->base.flags &= ~IPCORK_ALLFRAG; + skb_dst_set(skb, dst); +} + static void ip6_cork_release(struct inet_cork_full *cork, struct inet6_cork *v6_cork) { if (v6_cork->opt) { - kfree(v6_cork->opt->dst0opt); - kfree(v6_cork->opt->dst1opt); - kfree(v6_cork->opt->hopopt); - kfree(v6_cork->opt->srcrt); - kfree(v6_cork->opt); + struct ipv6_txoptions *opt = v6_cork->opt; + + kfree(opt->dst0opt); + kfree(opt->dst1opt); + kfree(opt->hopopt); + kfree(opt->srcrt); + kfree(opt); v6_cork->opt = NULL; } @@ -1827,7 +1839,6 @@ static void ip6_cork_release(struct inet_cork_full *cork, cork->base.dst = NULL; cork->base.flags &= ~IPCORK_ALLFRAG; } - memset(&cork->fl, 0, sizeof(cork->fl)); } struct sk_buff *__ip6_make_skb(struct sock *sk, @@ -1837,7 +1848,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; - struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; + struct in6_addr *final_dst; struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; @@ -1867,9 +1878,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, /* Allow local fragmentation. */ skb->ignore_df = ip6_sk_ignore_df(sk); - - *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); + + final_dst = &fl6->daddr; if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) @@ -1889,10 +1900,9 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = sk->sk_priority; skb->mark = cork->base.mark; - skb->tstamp = cork->base.transmit_time; - skb_dst_set(skb, dst_clone(&rt->dst)); + ip6_cork_steal_dst(skb, cork); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -1964,26 +1974,26 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm6_cookie *ipc6, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, - struct inet_cork_full *cork) + struct ipcm6_cookie *ipc6, struct rt6_info *rt, + unsigned int flags, struct inet_cork_full *cork) { struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); int err; - if (flags & MSG_PROBE) + if (flags & MSG_PROBE) { + dst_release(&rt->dst); return NULL; + } __skb_queue_head_init(&queue); cork->base.flags = 0; cork->base.addr = 0; cork->base.opt = NULL; - cork->base.dst = NULL; v6_cork.opt = NULL; - err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); + err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); if (err) { ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); @@ -1991,7 +2001,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; - err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, + err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags, ipc6); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 97ade833f58c..53f632a560ec 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1121,6 +1121,14 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); + } else if (skb->protocol == htons(ETH_P_IP)) { + const struct rtable *rt = skb_rtable(skb); + + if (!rt) + goto tx_err_link_failure; + + if (rt->rt_gw_family == AF_INET6) + memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr)); } } else if (t->parms.proto != 0 && !(t->parms.flags & (IP6_TNL_F_USE_ORIG_TCLASS | diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8a2db926b5eb..a9775c830194 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -255,13 +255,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr_table *mrt, *next; - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } fib_rules_unregister(net->ipv6.mr6_rules_ops); - rtnl_unlock(); } static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, @@ -318,10 +317,9 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { - rtnl_lock(); + ASSERT_RTNL(); ip6mr_free_table(net->ipv6.mrt6); net->ipv6.mrt6 = NULL; - rtnl_unlock(); } static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb, @@ -734,7 +732,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding--; + atomic_dec(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -902,7 +900,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding++; + atomic_inc(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -1042,7 +1040,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, int ret; #ifdef CONFIG_IPV6_PIMSM_V2 - if (assert == MRT6MSG_WHOLEPKT) + if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt) +sizeof(*msg)); else @@ -1058,7 +1056,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; #ifdef CONFIG_IPV6_PIMSM_V2 - if (assert == MRT6MSG_WHOLEPKT) { + if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) { /* Ugly, but we have no choice with this interface. Duplicate old header, fix length etc. And all this only to mangle msg->im6_msgtype and @@ -1070,8 +1068,11 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, skb_reset_transport_header(skb); msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; - msg->im6_msgtype = MRT6MSG_WHOLEPKT; - msg->im6_mif = mrt->mroute_reg_vif_num; + msg->im6_msgtype = assert; + if (assert == MRT6MSG_WRMIFWHOLE) + msg->im6_mif = mifi; + else + msg->im6_mif = mrt->mroute_reg_vif_num; msg->im6_pad = 0; msg->im6_src = ipv6_hdr(pkt)->saddr; msg->im6_dst = ipv6_hdr(pkt)->daddr; @@ -1325,7 +1326,9 @@ static int __net_init ip6mr_net_init(struct net *net) proc_cache_fail: remove_proc_entry("ip6_mr_vif", net->proc_net); proc_vif_fail: + rtnl_lock(); ip6mr_rules_exit(net); + rtnl_unlock(); #endif ip6mr_rules_fail: ip6mr_notifier_exit(net); @@ -1338,13 +1341,23 @@ static void __net_exit ip6mr_net_exit(struct net *net) remove_proc_entry("ip6_mr_cache", net->proc_net); remove_proc_entry("ip6_mr_vif", net->proc_net); #endif - ip6mr_rules_exit(net); ip6mr_notifier_exit(net); } +static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list) +{ + struct net *net; + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + ip6mr_rules_exit(net); + rtnl_unlock(); +} + static struct pernet_operations ip6mr_net_ops = { .init = ip6mr_net_init, .exit = ip6mr_net_exit, + .exit_batch = ip6mr_net_exit_batch, }; int __init ip6_mr_init(void) @@ -1553,7 +1566,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) } else { rcu_assign_pointer(mrt->mroute_sk, sk); sock_set_flag(sk, SOCK_RCU_FREE); - net->ipv6.devconf_all->mc_forwarding++; + atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } write_unlock_bh(&mrt_lock); @@ -1569,14 +1582,19 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) int ip6mr_sk_done(struct sock *sk) { - int err = -EACCES; struct net *net = sock_net(sk); + struct ipv6_devconf *devconf; struct mr_table *mrt; + int err = -EACCES; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return err; + devconf = net->ipv6.devconf_all; + if (!devconf || !atomic_read(&devconf->mc_forwarding)) + return err; + rtnl_lock(); ip6mr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { @@ -1586,7 +1604,7 @@ int ip6mr_sk_done(struct sock *sk) * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ - net->ipv6.devconf_all->mc_forwarding--; + atomic_dec(&devconf->mc_forwarding); write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, @@ -1635,6 +1653,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, mifi_t mifi; struct net *net = sock_net(sk); struct mr_table *mrt; + bool do_wrmifwhole; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1748,12 +1767,15 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, return -EINVAL; if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; + + do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE); v = !!v; rtnl_lock(); ret = 0; if (v != mrt->mroute_do_pim) { mrt->mroute_do_pim = v; mrt->mroute_do_assert = v; + mrt->mroute_do_wrvifwhole = do_wrmifwhole; } rtnl_unlock(); return ret; @@ -2129,6 +2151,9 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); + if (mrt->mroute_do_wrvifwhole) + ip6mr_cache_report(mrt, skb, true_vifi, + MRT6MSG_WRMIFWHOLE); } goto dont_forward; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a733803a710c..222f6bf220ba 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -475,7 +475,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); - sk->sk_prot = &tcp_prot; + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + WRITE_ONCE(sk->sk_prot, &tcp_prot); icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; @@ -489,7 +490,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); - sk->sk_prot = prot; + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + WRITE_ONCE(sk->sk_prot, prot); sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f03b597e4121..fcb288b0ae13 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -466,9 +466,8 @@ static void ip6_nd_hdr(struct sk_buff *skb, hdr->daddr = *daddr; } -static void ndisc_send_skb(struct sk_buff *skb, - const struct in6_addr *daddr, - const struct in6_addr *saddr) +void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(skb->dev); @@ -515,6 +514,7 @@ static void ndisc_send_skb(struct sk_buff *skb, rcu_read_unlock(); } +EXPORT_SYMBOL(ndisc_send_skb); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, @@ -598,22 +598,16 @@ static void ndisc_send_unsol_na(struct net_device *dev) in6_dev_put(idev); } -void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr, - u64 nonce) +struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *saddr, u64 nonce) { - struct sk_buff *skb; - struct in6_addr addr_buf; int inc_opt = dev->addr_len; - int optlen = 0; + struct sk_buff *skb; struct nd_msg *msg; + int optlen = 0; - if (!saddr) { - if (ipv6_get_lladdr(dev, &addr_buf, - (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))) - return; - saddr = &addr_buf; - } + if (!saddr) + return NULL; if (ipv6_addr_any(saddr)) inc_opt = false; @@ -625,7 +619,7 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) - return; + return NULL; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { @@ -647,7 +641,28 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, memcpy(opt + 2, &nonce, 6); } - ndisc_send_skb(skb, daddr, saddr); + return skb; +} +EXPORT_SYMBOL(ndisc_ns_create); + +void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, + const struct in6_addr *daddr, const struct in6_addr *saddr, + u64 nonce) +{ + struct in6_addr addr_buf; + struct sk_buff *skb; + + if (!saddr) { + if (ipv6_get_lladdr(dev, &addr_buf, + (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC))) + return; + saddr = &addr_buf; + } + + skb = ndisc_ns_create(dev, solicit, saddr, nonce); + + if (skb) + ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, @@ -1337,8 +1352,12 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } neigh->flags |= NTF_ROUTER; - } else if (rt) { + } else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) { + struct nl_info nlinfo = { + .nl_net = net, + }; rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); } if (rt) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6ab710b5a1a8..1da332450d98 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -121,6 +121,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + bool mono_delivery_time = skb->mono_delivery_time; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; @@ -186,7 +187,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb->tstamp = tstamp; + skb_set_delivery_time(skb, tstamp, mono_delivery_time); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -219,7 +220,7 @@ slow_path: goto blackhole; } - skb2->tstamp = tstamp; + skb_set_delivery_time(skb2, tstamp, mono_delivery_time); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5c47be29b9ee..7dd3629dd19e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -264,6 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; + fq->q.mono_delivery_time = skb->mono_delivery_time; fq->q.meat += skb->len; fq->ecn |= ecn; if (payload_len > fq->q.max_size) diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 3a00d95e964e..70a405b4006f 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -73,6 +73,7 @@ static const struct nft_expr_ops nft_dup_ipv6_ops = { .eval = nft_dup_ipv6_eval, .init = nft_dup_ipv6_init, .dump = nft_dup_ipv6_dump, + .reduce = NFT_REDUCE_READONLY, }; static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = { diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 92f3235fa287..b3f163b40c2b 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -211,6 +211,7 @@ static const struct nft_expr_ops nft_fib6_type_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, + .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib6_ops = { @@ -220,6 +221,7 @@ static const struct nft_expr_ops nft_fib6_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, + .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index ed69c768797e..5c61294f410e 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -46,6 +46,7 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 9256f6ba87ef..ff033d16549e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -59,8 +59,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct pingfakehdr pfh; struct ipcm6_cookie ipc6; - pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, sizeof(user_icmph)); if (err) @@ -99,7 +97,25 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if)) return -EINVAL; - /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + ipcm6_init_sk(&ipc6, np); + ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.mark = sk->sk_mark; + + if (msg->msg_controllen) { + struct ipv6_txoptions opt = {}; + + opt.tot_len = sizeof(opt); + ipc6.opt = &opt; + + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6); + if (err < 0) + return err; + + /* Changes to txoptions and flow info are not implemented, yet. + * Drop the options, fl6 is wiped below. + */ + ipc6.opt = NULL; + } memset(&fl6, 0, sizeof(fl6)); @@ -107,14 +123,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.saddr = np->saddr; fl6.daddr = *daddr; fl6.flowi6_oif = oif; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); - ipcm6_init_sk(&ipc6, np); - ipc6.sockc.mark = sk->sk_mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false); @@ -136,7 +150,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) pfh.wcheck = 0; pfh.family = AF_INET6; - ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + if (ipc6.hlimit < 0) + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 28e44782c94d..ff866f2a879e 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -194,6 +194,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; + fq->q.mono_delivery_time = skb->mono_delivery_time; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.fqdir, skb->truesize); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ea1cf414a92e..2fa10e60cccd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -130,6 +130,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net, struct uncached_list { spinlock_t lock; struct list_head head; + struct list_head quarantine; }; static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); @@ -149,35 +150,34 @@ void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; - struct net *net = dev_net(rt->dst.dev); spin_lock_bh(&ul->lock); - list_del(&rt->rt6i_uncached); - atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache); + list_del_init(&rt->rt6i_uncached); spin_unlock_bh(&ul->lock); } } -static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) +static void rt6_uncached_list_flush_dev(struct net_device *dev) { - struct net_device *loopback_dev = net->loopback_dev; int cpu; - if (dev == loopback_dev) - return; - for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); - struct rt6_info *rt; + struct rt6_info *rt, *safe; + + if (list_empty(&ul->head)) + continue; spin_lock_bh(&ul->lock); - list_for_each_entry(rt, &ul->head, rt6i_uncached) { + list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) { struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; + bool handled = false; if (rt_idev->dev == dev) { - rt->rt6i_idev = in6_dev_get(loopback_dev); + rt->rt6i_idev = in6_dev_get(blackhole_netdev); in6_dev_put(rt_idev); + handled = true; } if (rt_dev == dev) { @@ -185,7 +185,11 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) dev_replace_track(rt_dev, blackhole_netdev, &rt->dst.dev_tracker, GFP_ATOMIC); + handled = true; } + if (handled) + list_move(&rt->rt6i_uncached, + &ul->quarantine); } spin_unlock_bh(&ul->lock); } @@ -373,13 +377,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - struct net_device *loopback_dev = - dev_net(dev)->loopback_dev; - if (idev && idev->dev != loopback_dev) { - struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; + if (idev && idev->dev != blackhole_netdev) { + struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev); + + if (blackhole_idev) { + rt->rt6i_idev = blackhole_idev; in6_dev_put(idev); } } @@ -1206,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_node *fn; struct rt6_info *rt; - if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) - flags &= ~RT6_LOOKUP_F_IFACE; - rcu_read_lock(); fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: @@ -2178,9 +2178,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; - if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) - oif = 0; - redo_rt6_select: rt6_select(net, fn, oif, res, strict); if (res->f6i == net->ipv6.fib6_null_entry) { @@ -2244,7 +2241,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, * if caller sets RT6_LOOKUP_F_DST_NOREF flag. */ rt6_uncached_list_add(rt); - atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); rcu_read_unlock(); return rt; @@ -3056,12 +3052,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_info *rt; struct fib6_node *fn; - /* l3mdev_update_flow overrides oif if the device is enslaved; in - * this case we must match on the real ingress device, so reset it - */ - if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) - fl6->flowi6_oif = skb->dev->ifindex; - /* Get the "current" route for this destination and * check if the redirect has come from appropriate router. * @@ -3287,7 +3277,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, * do proper release of the net_device */ rt6_uncached_list_add(rt); - atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); @@ -4896,7 +4885,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event) void rt6_disable_ip(struct net_device *dev, unsigned long event) { rt6_sync_down_dev(dev, event); - rt6_uncached_list_flush_dev(dev_net(dev), dev); + rt6_uncached_list_flush_dev(dev); neigh_ifdown(&nd_tbl, dev); } @@ -5009,6 +4998,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; rtm = nlmsg_data(nlh); + if (rtm->rtm_tos) { + NL_SET_ERR_MSG(extack, + "Invalid dsfield (tos): option not available for IPv6"); + goto errout; + } + *cfg = (struct fib6_config){ .fc_table = rtm->rtm_table, .fc_dst_len = rtm->rtm_dst_len, @@ -6731,6 +6726,7 @@ int __init ip6_route_init(void) struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); INIT_LIST_HEAD(&ul->head); + INIT_LIST_HEAD(&ul->quarantine); spin_lock_init(&ul->lock); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 075ee8a2df3b..13678d3908fa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -148,6 +148,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_timewait_death_row *tcp_death_row; struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; @@ -156,7 +157,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct dst_entry *dst; int addr_type; int err; - struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -308,6 +308,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); + tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; @@ -772,57 +773,6 @@ clear_hash_noput: #endif -static bool tcp_v6_inbound_md5_hash(const struct sock *sk, - const struct sk_buff *skb, - int dif, int sdif) -{ -#ifdef CONFIG_TCP_MD5SIG - const __u8 *hash_location = NULL; - struct tcp_md5sig_key *hash_expected; - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); - int genhash, l3index; - u8 newhash[16]; - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index); - hash_location = tcp_parse_md5sig_option(th); - - /* We've parsed the options - do we have a hash? */ - if (!hash_expected && !hash_location) - return false; - - if (hash_expected && !hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return true; - } - - if (!hash_expected && hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return true; - } - - /* check the signature */ - genhash = tcp_v6_md5_hash_skb(newhash, - hash_expected, - NULL, skb); - - if (genhash || memcmp(hash_location, newhash, 16) != 0) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); - net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", - genhash ? "failed" : "mismatch", - &ip6h->saddr, ntohs(th->source), - &ip6h->daddr, ntohs(th->dest), l3index); - return true; - } -#endif - return false; -} - static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) @@ -920,12 +870,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 } #endif - buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, - GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (!buff) return; - skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); + skb_reserve(buff, MAX_TCP_HEADER); t1 = skb_push(buff, tot_len); skb_reset_transport_header(buff); @@ -991,7 +940,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 } else { mark = sk->sk_mark; } - buff->tstamp = tcp_transmit_time(sk); + skb_set_delivery_time(buff, tcp_transmit_time(sk), true); } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; fl6.fl6_dport = t1->dest; @@ -1471,6 +1420,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; + enum skb_drop_reason reason; struct tcp_sock *tp; /* Imagine: socket is IPv6. IPv4 packet arrives, @@ -1505,6 +1455,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (np->rxopt.all) opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + reason = SKB_DROP_REASON_NOT_SPECIFIED; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst; @@ -1558,9 +1509,10 @@ reset: discard: if (opt_skb) __kfree_skb(opt_skb); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; csum_err: + reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); @@ -1626,6 +1578,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { + enum skb_drop_reason drop_reason; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; @@ -1635,6 +1588,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) int ret; struct net *net = dev_net(skb->dev); + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1648,8 +1602,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; - if (unlikely(th->doff < sizeof(struct tcphdr)/4)) + if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; goto bad_packet; + } if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; @@ -1676,7 +1632,10 @@ process: struct sock *nsk; sk = req->rsk_listener; - if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) { + drop_reason = tcp_inbound_md5_hash(sk, skb, + &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); + if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; @@ -1705,6 +1664,8 @@ process: hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen); + } else { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } if (!nsk) { reqsk_put(req); @@ -1740,14 +1701,20 @@ process: } } - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto discard_and_relse; + } - if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) + drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); + if (drop_reason) goto discard_and_relse; - if (tcp_filter(sk, skb)) + if (tcp_filter(sk, skb)) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; + } th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); @@ -1768,7 +1735,7 @@ process: if (!sock_owned_by_user(sk)) { ret = tcp_v6_do_rcv(sk, skb); } else { - if (tcp_add_backlog(sk, skb)) + if (tcp_add_backlog(sk, skb, &drop_reason)) goto discard_and_relse; } bh_unlock_sock(sk); @@ -1778,6 +1745,7 @@ put_and_return: return ret ? -1 : 0; no_tcp_socket: + drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; @@ -1785,6 +1753,7 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: + drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -1794,7 +1763,7 @@ bad_packet: } discard_it: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; discard_and_relse: @@ -1805,6 +1774,7 @@ discard_and_relse: do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; inet_twsk_put(inet_twsk(sk)); goto discard_it; } @@ -2237,15 +2207,9 @@ static void __net_exit tcpv6_net_exit(struct net *net) inet_ctl_sock_destroy(net->ipv6.tcp_sk); } -static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) -{ - inet_twsk_purge(&tcp_hashinfo, AF_INET6); -} - static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, - .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 528b81ef19c9..7f0fa9bd9ffe 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -912,6 +912,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); struct udphdr *uh; @@ -988,6 +989,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return udp6_unicast_rcv_skb(sk, skb, uh); } + reason = SKB_DROP_REASON_NO_SOCKET; + if (!uh->check) goto report_csum_error; @@ -1000,10 +1003,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; short_packet: + if (reason == SKB_DROP_REASON_NOT_SPECIFIED) + reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", saddr, ntohs(uh->source), @@ -1014,10 +1019,12 @@ short_packet: report_csum_error: udp6_csum_zero_error(skb); csum_error: + if (reason == SKB_DROP_REASON_NOT_SPECIFIED) + reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } @@ -1266,23 +1273,17 @@ static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udp_sock *up = udp_sk(sk); - struct flowi6 fl6; int err = 0; if (up->pending == AF_INET) return udp_push_pending_frames(sk); - /* ip6_finish_skb will release the cork, so make a copy of - * fl6 here. - */ - fl6 = inet_sk(sk)->cork.fl.u.ip6; - skb = ip6_finish_skb(sk); if (!skb) goto out; - err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base); - + err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6, + &inet_sk(sk)->cork.base); out: up->len = 0; up->pending = 0; @@ -1300,7 +1301,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi6 fl6; + struct inet_cork_full cork; + struct flowi6 *fl6 = &cork.fl.u.ip6; struct dst_entry *dst; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; @@ -1363,9 +1365,6 @@ do_udp_sendmsg: } } - if (up->pending == AF_INET) - return udp_sendmsg(sk, msg, len); - /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ @@ -1374,6 +1373,8 @@ do_udp_sendmsg: getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { + if (up->pending == AF_INET) + return udp_sendmsg(sk, msg, len); /* * There are pending frames. * The socket lock must be held while it's corked. @@ -1391,19 +1392,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl6, 0, sizeof(fl6)); + memset(fl6, 0, sizeof(*fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl6.fl6_dport = sin6->sin6_port; + fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1420,24 +1421,24 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) - fl6.flowi6_oif = sin6->sin6_scope_id; + fl6->flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl6.fl6_dport = inet->inet_dport; + fl6->fl6_dport = inet->inet_dport; daddr = &sk->sk_v6_daddr; - fl6.flowlabel = np->flow_label; + fl6->flowlabel = np->flow_label; connected = true; } - if (!fl6.flowi6_oif) - fl6.flowi6_oif = sk->sk_bound_dev_if; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = sk->sk_bound_dev_if; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; @@ -1447,14 +1448,14 @@ do_udp_sendmsg: err = udp_cmsg_send(sk, msg, &ipc6.gso_size); if (err > 0) - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1471,16 +1472,17 @@ do_udp_sendmsg: opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; - fl6.flowi6_proto = sk->sk_protocol; - fl6.flowi6_mark = ipc6.sockc.mark; - fl6.daddr = *daddr; - if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) - fl6.saddr = np->saddr; - fl6.fl6_sport = inet->inet_sport; + fl6->flowi6_proto = sk->sk_protocol; + fl6->flowi6_mark = ipc6.sockc.mark; + fl6->daddr = *daddr; + if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) + fl6->saddr = np->saddr; + fl6->fl6_sport = inet->inet_sport; if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, - (struct sockaddr *)sin6, &fl6.saddr); + (struct sockaddr *)sin6, + &fl6->saddr); if (err) goto out_no_dst; if (sin6) { @@ -1496,32 +1498,32 @@ do_udp_sendmsg: err = -EINVAL; goto out_no_dst; } - fl6.fl6_dport = sin6->sin6_port; - fl6.daddr = sin6->sin6_addr; + fl6->fl6_dport = sin6->sin6_port; + fl6->daddr = sin6->sin6_addr; } } - if (ipv6_addr_any(&fl6.daddr)) - fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6->daddr)) + fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(fl6, opt, &final); if (final_p) connected = false; - if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { - fl6.flowi6_oif = np->mcast_oif; + if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { + fl6->flowi6_oif = np->mcast_oif; connected = false; - } else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + } else if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected); + dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -1529,7 +1531,7 @@ do_udp_sendmsg: } if (ipc6.hlimit < 0) - ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -1537,17 +1539,17 @@ back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { - struct inet_cork_full cork; struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, - &fl6, (struct rt6_info *)dst, + (struct rt6_info *)dst, msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_v6_send_skb(skb, &fl6, &cork.base); - goto out; + err = udp_v6_send_skb(skb, fl6, &cork.base); + /* ip6_make_skb steals dst reference */ + goto out_no_dst; } lock_sock(sk); @@ -1568,7 +1570,7 @@ do_append_data: ipc6.dontfrag = np->dontfrag; up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, &fl6, (struct rt6_info *)dst, + &ipc6, fl6, (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); @@ -1603,7 +1605,7 @@ out_no_dst: do_confirm: if (msg->msg_flags & MSG_PROBE) - dst_confirm_neigh(dst, &fl6.daddr); + dst_confirm_neigh(dst, &fl6->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index fad687ee6dd8..e64e427a51cf 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, int err; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); - fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); fl6.flowi6_mark = mark; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) @@ -92,7 +91,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt6.rt6i_src = rt->rt6i_src; INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); rt6_uncached_list_add(&xdst->u.rt6); - atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); return 0; } |