diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/Kconfig | 1 | ||||
-rw-r--r-- | net/ipv6/Makefile | 2 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 200 | ||||
-rw-r--r-- | net/ipv6/addrlabel.c | 59 | ||||
-rw-r--r-- | net/ipv6/ah6.c | 80 | ||||
-rw-r--r-- | net/ipv6/esp6.c | 26 | ||||
-rw-r--r-- | net/ipv6/exthdrs_core.c | 2 | ||||
-rw-r--r-- | net/ipv6/exthdrs_offload.c | 4 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_checksum.c | 4 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 121 | ||||
-rw-r--r-- | net/ipv6/ip6_flowlabel.c | 6 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 9 | ||||
-rw-r--r-- | net/ipv6/ip6_offload.c | 20 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 53 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 13 | ||||
-rw-r--r-- | net/ipv6/ip6_vti.c | 316 | ||||
-rw-r--r-- | net/ipv6/ip6mr.c | 13 | ||||
-rw-r--r-- | net/ipv6/ipcomp6.c | 22 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 2 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 11 | ||||
-rw-r--r-- | net/ipv6/output_core.c | 27 | ||||
-rw-r--r-- | net/ipv6/ping.c | 5 | ||||
-rw-r--r-- | net/ipv6/route.c | 52 | ||||
-rw-r--r-- | net/ipv6/sit.c | 29 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 46 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 2 | ||||
-rw-r--r-- | net/ipv6/xfrm6_mode_tunnel.c | 63 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 7 | ||||
-rw-r--r-- | net/ipv6/xfrm6_protocol.c | 270 |
30 files changed, 991 insertions, 476 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index d92e5586783e..438a73aa777c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -138,6 +138,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION config IPV6_VTI tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL + select NET_IP_TUNNEL depends on INET6_XFRM_MODE_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 17bb830872db..2fe68364bb20 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ - xfrm6_output.o + xfrm6_output.o xfrm6_protocol.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ad235690684c..6c7fa0853fc7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(unsigned long); +static void addrconf_verify(void); +static void addrconf_verify_rtnl(void); +static void addrconf_verify_work(struct work_struct *); -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); +static struct workqueue_struct *addrconf_wq; +static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, u32 flags, u32 noflags); static void addrconf_dad_start(struct inet6_ifaddr *ifp); -static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); @@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev) __in6_dev_put(idev); } -static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) { - if (del_timer(&ifp->dad_timer)) + if (cancel_delayed_work(&ifp->dad_work)) __in6_ifa_put(ifp); } @@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, mod_timer(&idev->rs_timer, jiffies + when); } -static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, - unsigned long when) +static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, + unsigned long delay) { - if (!timer_pending(&ifp->dad_timer)) + if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); - mod_timer(&ifp->dad_timer, jiffies + when); + mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -751,8 +753,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->dad_timer)) - pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); + if (cancel_delayed_work(&ifp->dad_work)) + pr_notice("delayed DAD work was pending while freeing ifa=%p\n", + ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { pr_warn("Freeing alive inet6 address %p\n", ifp); @@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - setup_timer(&ifa->dad_timer, addrconf_dad_timer, - (unsigned long)ifa); + INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -990,6 +992,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; unsigned long expires; + ASSERT_RTNL(); + spin_lock_bh(&ifp->state_lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; @@ -1021,7 +1025,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) write_unlock_bh(&ifp->idev->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1103,8 +1107,11 @@ retry: * Lifetime is greater than REGEN_ADVANCE time units. In particular, * an implementation must not create a temporary address with a zero * Preferred Lifetime. + * Use age calculation as in addrconf_verify to avoid unnecessary + * temporary addresses being generated. */ - if (tmp_prefered_lft <= regen_advance) { + age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + if (tmp_prefered_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; @@ -1601,7 +1608,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -1622,20 +1629,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); - } else + } else { ipv6_del_addr(ifp); + } } static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock(&ifp->state_lock); + spin_lock_bh(&ifp->state_lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock(&ifp->state_lock); + spin_unlock_bh(&ifp->state_lock); return err; } @@ -1668,7 +1676,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - addrconf_dad_stop(ifp, 1); + spin_lock_bh(&ifp->state_lock); + /* transition from _POSTDAD to _ERRDAD */ + ifp->state = INET6_IFADDR_STATE_ERRDAD; + spin_unlock_bh(&ifp->state_lock); + + addrconf_mod_dad_work(ifp, 0); } /* Join to solicited addr multicast group. */ @@ -1677,6 +1690,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1688,6 +1703,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1698,6 +1715,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1709,6 +1729,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -2268,11 +2291,13 @@ ok: return; } - ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; + spin_lock_bh(&ifp->lock); + ifp->flags |= IFA_F_MANAGETEMPADDR; ifp->cstamp = jiffies; ifp->tokenized = tokenized; + spin_unlock_bh(&ifp->lock); addrconf_dad_start(ifp); } @@ -2323,7 +2348,7 @@ ok: create, now); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify(); } } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); @@ -2472,7 +2497,7 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -2783,6 +2808,8 @@ static void addrconf_gre_config(struct net_device *dev) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) addrconf_add_linklocal(idev, &addr); + else + addrconf_prefix_route(&addr, 64, dev, 0, 0); } #endif @@ -3006,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); goto restart; } } @@ -3044,7 +3071,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); list_del(&ifa->if_list); @@ -3143,10 +3170,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); ifp->dad_probes = idev->cnf.dad_transmits; - addrconf_mod_dad_timer(ifp, rand_num); + addrconf_mod_dad_work(ifp, rand_num); } -static void addrconf_dad_start(struct inet6_ifaddr *ifp) +static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; @@ -3198,25 +3225,68 @@ out: read_unlock_bh(&idev->lock); } -static void addrconf_dad_timer(unsigned long data) +static void addrconf_dad_start(struct inet6_ifaddr *ifp) +{ + bool begin_dad = false; + + spin_lock_bh(&ifp->state_lock); + if (ifp->state != INET6_IFADDR_STATE_DEAD) { + ifp->state = INET6_IFADDR_STATE_PREDAD; + begin_dad = true; + } + spin_unlock_bh(&ifp->state_lock); + + if (begin_dad) + addrconf_mod_dad_work(ifp, 0); +} + +static void addrconf_dad_work(struct work_struct *w) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + struct inet6_ifaddr *ifp = container_of(to_delayed_work(w), + struct inet6_ifaddr, + dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + enum { + DAD_PROCESS, + DAD_BEGIN, + DAD_ABORT, + } action = DAD_PROCESS; + + rtnl_lock(); + + spin_lock_bh(&ifp->state_lock); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) { + action = DAD_BEGIN; + ifp->state = INET6_IFADDR_STATE_DAD; + } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { + action = DAD_ABORT; + ifp->state = INET6_IFADDR_STATE_POSTDAD; + } + spin_unlock_bh(&ifp->state_lock); + + if (action == DAD_BEGIN) { + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { + addrconf_dad_stop(ifp, 1); + goto out; + } + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } @@ -3227,7 +3297,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); @@ -3235,16 +3305,17 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); + addrconf_mod_dad_work(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); + rtnl_unlock(); } /* ifp->idev must be at least read locked */ @@ -3271,7 +3342,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) struct in6_addr lladdr; bool send_rs, send_mld; - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); /* * Configure the address for reception. Now it is valid. @@ -3512,23 +3583,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) * Periodic address status verification */ -static void addrconf_verify(unsigned long foo) +static void addrconf_verify_rtnl(void) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; int i; + ASSERT_RTNL(); + rcu_read_lock_bh(); - spin_lock(&addrconf_verify_lock); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, - &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -3623,13 +3694,22 @@ restart: ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - - addr_chk_timer.expires = next_sched; - add_timer(&addr_chk_timer); - spin_unlock(&addrconf_verify_lock); + mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } +static void addrconf_verify_work(struct work_struct *w) +{ + rtnl_lock(); + addrconf_verify_rtnl(); + rtnl_unlock(); +} + +static void addrconf_verify(void) +{ + mod_delayed_work(addrconf_wq, &addr_chk_work, 0); +} + static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, struct in6_addr **peer_pfx) { @@ -3686,6 +3766,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, bool was_managetempaddr; bool had_prefixroute; + ASSERT_RTNL(); + if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3751,7 +3833,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, !was_managetempaddr, jiffies); } - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4381,6 +4463,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) bool update_rs = false; struct in6_addr ll_addr; + ASSERT_RTNL(); + if (token == NULL) return -EINVAL; if (ipv6_addr_any(token)) @@ -4429,7 +4513,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4631,6 +4715,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); + if (event) + ASSERT_RTNL(); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -5239,6 +5326,12 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; + addrconf_wq = create_workqueue("ipv6_addrconf"); + if (!addrconf_wq) { + err = -ENOMEM; + goto out_nowq; + } + /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -5269,7 +5362,7 @@ int __init addrconf_init(void) register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(0); + addrconf_verify(); rtnl_af_register(&inet6_ops); @@ -5297,6 +5390,8 @@ errout: rtnl_af_unregister(&inet6_ops); unregister_netdevice_notifier(&ipv6_dev_notf); errlo: + destroy_workqueue(addrconf_wq); +out_nowq: unregister_pernet_subsys(&addrconf_ops); out_addrlabel: ipv6_addr_label_cleanup(); @@ -5332,7 +5427,8 @@ void addrconf_cleanup(void) for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_addr_lst[i])); spin_unlock_bh(&addrconf_hash_lock); - - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); rtnl_unlock(); + + destroy_workqueue(addrconf_wq); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b30ad3741b46..731e1e1722d9 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -6,7 +6,7 @@ */ /* * Author: - * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> */ #include <linux/kernel.h> @@ -22,14 +22,13 @@ #if 0 #define ADDRLABEL(x...) printk(x) #else -#define ADDRLABEL(x...) do { ; } while(0) +#define ADDRLABEL(x...) do { ; } while (0) #endif /* * Policy Table */ -struct ip6addrlbl_entry -{ +struct ip6addrlbl_entry { #ifdef CONFIG_NET_NS struct net *lbl_net; #endif @@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table { /* ::/0 */ .prefix = &in6addr_any, .label = 1, - },{ /* fc00::/7 */ - .prefix = &(struct in6_addr){{{ 0xfc }}}, + }, { /* fc00::/7 */ + .prefix = &(struct in6_addr){ { { 0xfc } } } , .prefixlen = 7, .label = 5, - },{ /* fec0::/10 */ - .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, + }, { /* fec0::/10 */ + .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, .prefixlen = 10, .label = 11, - },{ /* 2002::/16 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, + }, { /* 2002::/16 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, .prefixlen = 16, .label = 2, - },{ /* 3ffe::/16 */ - .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, + }, { /* 3ffe::/16 */ + .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, .prefixlen = 16, .label = 12, - },{ /* 2001::/32 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, + }, { /* 2001::/32 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, .prefixlen = 32, .label = 6, - },{ /* 2001:10::/28 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, + }, { /* 2001:10::/28 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, .prefixlen = 28, .label = 7, - },{ /* ::ffff:0:0 */ - .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, + }, { /* ::ffff:0:0 */ + .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, .prefixlen = 96, .label = 4, - },{ /* ::/96 */ + }, { /* ::/96 */ .prefix = &in6addr_any, .prefixlen = 96, .label = 3, - },{ /* ::1/128 */ + }, { /* ::1/128 */ .prefix = &in6addr_loopback, .prefixlen = 128, .label = 0, @@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) if (label == IPV6_ADDR_LABEL_DEFAULT) return -EINVAL; - switch(nlh->nlmsg_type) { + switch (nlh->nlmsg_type) { case RTM_NEWADDRLABEL: if (ifal->ifal_index && !__dev_get_by_index(net, ifal->ifal_index)) @@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (idx >= s_idx && net_eq(ip6addrlbl_net(p), net)) { - if ((err = ip6addrlbl_fill(skb, p, - ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWADDRLABEL, - NLM_F_MULTI)) <= 0) + err = ip6addrlbl_fill(skb, p, + ip6addrlbl_table.seq, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWADDRLABEL, + NLM_F_MULTI); + if (err <= 0) break; } idx++; @@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; @@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) goto out; } - if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { + skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); + if (!skb) { ip6addrlbl_put(p); return -ENOBUFS; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 81e496a2e008..72a4930bdc0a 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) struct ip_auth_hdr *ah; struct ah_data *ahp; struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; ahp = x->data; ahash = ahp->ahash; @@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } err = -ENOMEM; - iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen); + iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN + + extlen + seqhi_len); if (!iph_base) goto out; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(ahash, iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(ahash, seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; ah = ip_auth_hdr(skb); memset(ah->auth_data, 0, ahp->icv_trunc_len); @@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); - ahash_request_set_crypt(req, sg, icv, skb->len); + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(seqhisg, seqhi, seqhi_len); + } + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah6_output_done, skb); AH_SKB_CB(skb)->tmp = iph_base; @@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) int nexthdr; int nfrags; int err = -ENOMEM; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) goto out; @@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, hdr_len); - work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } + + work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len + + ahp->icv_trunc_len + seqhi_len); if (!work_iph) goto out; - auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); + auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(ahash, seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; memcpy(work_iph, ip6h, hdr_len); memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); @@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->flow_lbl[2] = 0; ip6h->hop_limit = 0; - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); + + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(seqhisg, seqhi, seqhi_len); + } - ahash_request_set_crypt(req, sg, icv, skb->len); + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah6_input_done, skb); AH_SKB_CB(skb)->tmp = work_iph; @@ -609,8 +643,8 @@ out: return err; } -static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; @@ -619,17 +653,19 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static int ah6_init_state(struct xfrm_state *x) @@ -714,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x) kfree(ahp); } +static int ah6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type ah6_type = { .description = "AH6", @@ -727,10 +768,11 @@ static const struct xfrm_type ah6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol ah6_protocol = { +static struct xfrm6_protocol ah6_protocol = { .handler = xfrm6_rcv, + .cb_handler = ah6_rcv_cb, .err_handler = ah6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init ah6_init(void) @@ -740,7 +782,7 @@ static int __init ah6_init(void) return -EAGAIN; } - if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) { + if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ah6_type, AF_INET6); return -EAGAIN; @@ -751,7 +793,7 @@ static int __init ah6_init(void) static void __exit ah6_fini(void) { - if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0) + if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6eef8a7e35f2..d15da1377149 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -421,8 +421,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) net_adj) & ~(blksize - 1)) + net_adj - 2; } -static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; @@ -431,18 +431,20 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static void esp6_destroy(struct xfrm_state *x) @@ -614,6 +616,11 @@ error: return err; } +static int esp6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type esp6_type = { .description = "ESP6", @@ -628,10 +635,11 @@ static const struct xfrm_type esp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol esp6_protocol = { - .handler = xfrm6_rcv, +static struct xfrm6_protocol esp6_protocol = { + .handler = xfrm6_rcv, + .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init esp6_init(void) @@ -640,7 +648,7 @@ static int __init esp6_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) { + if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&esp6_type, AF_INET6); return -EAGAIN; @@ -651,7 +659,7 @@ static int __init esp6_init(void) static void __exit esp6_fini(void) { - if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0) + if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 140748debc4a..8af3eb57f438 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) + if (target < 0 || found) break; return -ENOENT; } diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3abfd06..447a7fbd1bb6 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void) int ret; ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) + if (ret) goto out; ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) + if (ret) goto out_rt; out: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f2610e157660..7b326529e6a2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -520,7 +520,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) np->tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 72d198b8e4d2..ee7a97f510cb 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -79,7 +79,9 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) /* RFC 2460 section 8.1 says that we SHOULD log this error. Well, it is reasonable. */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(uh->source), + &ipv6_hdr(skb)->daddr, ntohs(uh->dest)); return 1; } if (skb->ip_summed == CHECKSUM_COMPLETE && diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 075602fc6b6a..34e0ded5c14b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -9,14 +9,12 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. - */ - -/* - * Changes: - * Yuji SEKIYA @USAGI: Support default route on router node; - * remove ip6_null_entry from the top of - * routing table. - * Ville Nuorvala: Fixed routing subtrees. + * + * Changes: + * Yuji SEKIYA @USAGI: Support default route on router node; + * remove ip6_null_entry from the top of + * routing table. + * Ville Nuorvala: Fixed routing subtrees. */ #define pr_fmt(fmt) "IPv6: " fmt @@ -46,10 +44,9 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -static struct kmem_cache * fib6_node_kmem __read_mostly; +static struct kmem_cache *fib6_node_kmem __read_mostly; -enum fib_walk_state_t -{ +enum fib_walk_state_t { #ifdef CONFIG_IPV6_SUBTREES FWS_S, #endif @@ -59,8 +56,7 @@ enum fib_walk_state_t FWS_U }; -struct fib6_cleaner_t -{ +struct fib6_cleaner_t { struct fib6_walker_t w; struct net *net; int (*func)(struct rt6_info *, void *arg); @@ -138,7 +134,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) const __be32 *addr = token; /* * Here, - * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) + * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) * is optimized version of * htonl(1 << ((~fn_bit)&0x1F)) * See include/asm-generic/bitops/le.h. @@ -147,7 +143,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } -static __inline__ struct fib6_node * node_alloc(void) +static __inline__ struct fib6_node *node_alloc(void) { struct fib6_node *fn; @@ -156,7 +152,7 @@ static __inline__ struct fib6_node * node_alloc(void) return fn; } -static __inline__ void node_free(struct fib6_node * fn) +static __inline__ void node_free(struct fib6_node *fn) { kmem_cache_free(fib6_node_kmem, fn); } @@ -292,7 +288,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) static void fib6_dump_end(struct netlink_callback *cb) { - struct fib6_walker_t *w = (void*)cb->args[2]; + struct fib6_walker_t *w = (void *)cb->args[2]; if (w) { if (cb->args[4]) { @@ -302,7 +298,7 @@ static void fib6_dump_end(struct netlink_callback *cb) cb->args[2] = 0; kfree(w); } - cb->done = (void*)cb->args[3]; + cb->done = (void *)cb->args[3]; cb->args[1] = 3; } @@ -485,7 +481,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, fn->fn_sernum = sernum; dir = addr_bit_set(addr, fn->fn_bit); pn = fn; - fn = dir ? fn->right: fn->left; + fn = dir ? fn->right : fn->left; } while (fn); if (!allow_create) { @@ -638,12 +634,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } +static int fib6_commit_metrics(struct dst_entry *dst, + struct nlattr *mx, int mx_len) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (dst->flags & DST_HOST) { + mp = dst_metrics_write_ptr(dst); + } else { + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!mp) + return -ENOMEM; + dst_init_metrics(dst, mp, 0); + } + + nla_for_each_attr(nla, mx, mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + if (type > RTAX_MAX) + return -EINVAL; + + mp[type - 1] = nla_get_u32(nla); + } + } + return 0; +} + /* * Insert routing information in a node. */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info) + struct nl_info *info, struct nlattr *mx, int mx_len) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -653,6 +678,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + int err; ins = &fn->leaf; @@ -751,6 +777,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; @@ -770,6 +801,11 @@ add: pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; @@ -806,7 +842,8 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -900,7 +937,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) } #endif - err = fib6_add_rt2node(fn, rt, info); + err = fib6_add_rt2node(fn, rt, info, mx, mx_len); if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) @@ -955,8 +992,8 @@ struct lookup_args { const struct in6_addr *addr; /* search key */ }; -static struct fib6_node * fib6_lookup_1(struct fib6_node *root, - struct lookup_args *args) +static struct fib6_node *fib6_lookup_1(struct fib6_node *root, + struct lookup_args *args) { struct fib6_node *fn; __be32 dir; @@ -1018,8 +1055,8 @@ backtrack: return NULL; } -struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, - const struct in6_addr *saddr) +struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct fib6_node *fn; struct lookup_args args[] = { @@ -1051,9 +1088,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da */ -static struct fib6_node * fib6_locate_1(struct fib6_node *root, - const struct in6_addr *addr, - int plen, int offset) +static struct fib6_node *fib6_locate_1(struct fib6_node *root, + const struct in6_addr *addr, + int plen, int offset) { struct fib6_node *fn; @@ -1081,9 +1118,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root, return NULL; } -struct fib6_node * fib6_locate(struct fib6_node *root, - const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len) +struct fib6_node *fib6_locate(struct fib6_node *root, + const struct in6_addr *daddr, int dst_len, + const struct in6_addr *saddr, int src_len) { struct fib6_node *fn; @@ -1151,8 +1188,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, children = 0; child = NULL; - if (fn->right) child = fn->right, children |= 1; - if (fn->left) child = fn->left, children |= 2; + if (fn->right) + child = fn->right, children |= 1; + if (fn->left) + child = fn->left, children |= 2; if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES @@ -1180,8 +1219,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } else { WARN_ON(fn->fn_flags & RTN_ROOT); #endif - if (pn->right == fn) pn->right = child; - else if (pn->left == fn) pn->left = child; + if (pn->right == fn) + pn->right = child; + else if (pn->left == fn) + pn->left = child; #if RT6_DEBUG >= 2 else WARN_ON(1); @@ -1213,10 +1254,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, w->node = child; if (children&2) { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_R ? FWS_U : FWS_INIT; } else { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_C ? FWS_U : FWS_INIT; } } } @@ -1314,7 +1355,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info **rtp; #if RT6_DEBUG >= 2 - if (rt->dst.obsolete>0) { + if (rt->dst.obsolete > 0) { WARN_ON(fn != NULL); return -ENOENT; } @@ -1707,7 +1748,7 @@ out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: return -ENOMEM; - } +} static void fib6_net_exit(struct net *net) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index dfa41bb4e0dc..0961b5ef866d 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -15,9 +15,7 @@ #include <linux/socket.h> #include <linux/net.h> #include <linux/netdevice.h> -#include <linux/if_arp.h> #include <linux/in6.h> -#include <linux/route.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -28,12 +26,8 @@ #include <net/sock.h> #include <net/ipv6.h> -#include <net/ndisc.h> -#include <net/protocol.h> -#include <net/ip6_route.h> #include <net/addrconf.h> #include <net/rawv6.h> -#include <net/icmp.h> #include <net/transp_v6.h> #include <asm/uaccess.h> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f3ffb43f59c0..c98338b81d30 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1454,7 +1454,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1464,16 +1463,10 @@ static int ip6gre_tap_init(struct net_device *dev) ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6gre_tap_stats; - ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tap_stats->syncp); - } - return 0; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 1e8683b135bb..59f95affceb0 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unsigned int unfrag_ip6hlen; u8 *prevhdr; int offset = 0; - bool tunnel; + bool encap, udpfrag; int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & @@ -110,8 +110,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; - tunnel = SKB_GSO_CB(skb)->encap_level > 0; - if (tunnel) + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) features = skb->dev->hw_enc_features & netif_skb_features(skb); SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); @@ -121,6 +121,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + udpfrag = proto == IPPROTO_UDP && encap; + else + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); @@ -133,13 +139,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); - if (tunnel) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } skb->network_header = (u8 *)ipv6h - skb->head; - if (!tunnel && proto == IPPROTO_UDP) { + if (udpfrag) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); @@ -148,6 +150,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); } + if (encap) + skb_reset_inner_headers(skb); } out: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ef02b26ccf81..3284d61577c0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu || skb->local_df) + return false; + + if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) + return true; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -353,6 +367,9 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->forwarding == 0) goto error; + if (skb->pkt_type != PACKET_HOST) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; @@ -362,9 +379,6 @@ int ip6_forward(struct sk_buff *skb) goto drop; } - if (skb->pkt_type != PACKET_HOST) - goto drop; - skb_forward_csum(skb); /* @@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || - (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { + if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -517,9 +530,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - to->nf_trace = from->nf_trace; -#endif skb_copy_secmark(to, from); } @@ -1091,21 +1101,19 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, - bool pmtuprobe) + unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { /* first fragment, reserve header_len */ - *mtu = *mtu - rt->dst.header_len; + *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = min(*mtu, pmtuprobe ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path)); + *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1122,7 +1130,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; int dst_exthdrlen; int hh_len; @@ -1204,6 +1212,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, dst_exthdrlen = 0; mtu = cork->fragsize; } + orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1221,8 +1230,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? - mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + if (ip6_sk_local_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; /* dontfrag active */ if ((cork->length + length > mtu - headersize) && dontfrag && @@ -1301,8 +1312,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc >= - IPV6_PMTUDISC_PROBE); + orig_mtu); skb_prev = skb; @@ -1530,8 +1540,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - if (np->pmtudisc < IPV6_PMTUDISC_DO) - skb->local_df = 1; + skb->local_df = ip6_sk_local_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); @@ -1558,8 +1567,8 @@ int ip6_push_pending_frames(struct sock *sk) if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5db8d310f9c0..e1df691d78be 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -108,12 +108,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) per_cpu_ptr(dev->tstats, i); do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); + start = u64_stats_fetch_begin_irq(&tstats->syncp); tmp.rx_packets = tstats->rx_packets; tmp.rx_bytes = tstats->rx_bytes; tmp.tx_packets = tstats->tx_packets; tmp.tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); sum.rx_packets += tmp.rx_packets; sum.rx_bytes += tmp.rx_bytes; @@ -1502,19 +1502,12 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - int i; t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6_tnl_stats; - ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6_tnl_stats->syncp); - } return 0; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 2d19272b8cee..b7c0f827140b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -278,7 +278,6 @@ static void vti6_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); - ip6_tnl_dst_reset(t); dev_put(dev); } @@ -288,11 +287,8 @@ static int vti6_rcv(struct sk_buff *skb) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); rcu_read_lock(); - if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_sw_netstats *tstats; - if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -309,27 +305,58 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } - tstats = this_cpu_ptr(t->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - skb->mark = 0; - secpath_reset(skb); - skb->dev = t->dev; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + skb->mark = be32_to_cpu(t->parms.i_key); rcu_read_unlock(); - return 0; + + return xfrm6_rcv(skb); } rcu_read_unlock(); - return 1; - + return -EINVAL; discard: kfree_skb(skb); return 0; } +static int vti6_rcv_cb(struct sk_buff *skb, int err) +{ + unsigned short family; + struct net_device *dev; + struct pcpu_sw_netstats *tstats; + struct xfrm_state *x; + struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; + + if (!t) + return 1; + + dev = t->dev; + + if (err) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + + return 0; + } + + x = xfrm_input_state(skb); + family = x->inner_mode->afinfo->family; + + if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + return -EPERM; + + skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); + skb->dev = dev; + + tstats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + return 0; +} + /** * vti6_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device @@ -349,44 +376,56 @@ vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } +static bool vti6_state_check(const struct xfrm_state *x, + const struct in6_addr *dst, + const struct in6_addr *src) +{ + xfrm_address_t *daddr = (xfrm_address_t *)dst; + xfrm_address_t *saddr = (xfrm_address_t *)src; + + /* if there is no transform then this tunnel is not functional. + * Or if the xfrm is not mode tunnel. + */ + if (!x || x->props.mode != XFRM_MODE_TUNNEL || + x->props.family != AF_INET6) + return false; + + if (ipv6_addr_any(dst)) + return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6); + + if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6)) + return false; + + return true; +} + /** * vti6_xmit - send a packet * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device + * @fl: the flow informations for the xfrm_lookup **/ -static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) +static int +vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { - struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; - struct dst_entry *dst = NULL, *ndst = NULL; - struct flowi6 fl6; - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; int err = -1; - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) - return err; - - dst = ip6_tnl_dst_check(t); - if (!dst) { - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - - ndst = ip6_route_output(net, NULL, &fl6); + if (!dst) + goto tx_err_link_failure; - if (ndst->error) - goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; - goto tx_err_link_failure; - } - dst = ndst; + dst_hold(dst); + dst = xfrm_lookup(t->net, dst, fl, NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto tx_err_link_failure; } - if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL) + if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr)) goto tx_err_link_failure; tdev = dst->dev; @@ -398,14 +437,21 @@ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err_dst_release; } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = skb_dst(skb)->dev; - skb_dst_drop(skb); - skb_dst_set_noref(skb, dst); + err = dst_output(skb); + if (net_xmit_eval(err) == 0) { + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); - ip6tunnel_xmit(skb, dev); - if (ndst) { - dev->mtu = dst_mtu(ndst); - ip6_tnl_dst_store(t, ndst); + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += skb->len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; } return 0; @@ -413,7 +459,7 @@ tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -422,16 +468,33 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; + struct ipv6hdr *ipv6h; + struct flowi fl; int ret; + memset(&fl, 0, sizeof(fl)); + skb->mark = be32_to_cpu(t->parms.o_key); + switch (skb->protocol) { case htons(ETH_P_IPV6): - ret = vti6_xmit(skb, dev); + ipv6h = ipv6_hdr(skb); + + if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || + !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + goto tx_err; + + xfrm_decode_session(skb, &fl, AF_INET6); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + break; + case htons(ETH_P_IP): + xfrm_decode_session(skb, &fl, AF_INET); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); break; default: goto tx_err; } + ret = vti6_xmit(skb, dev, &fl); if (ret < 0) goto tx_err; @@ -444,24 +507,66 @@ tx_err: return NETDEV_TX_OK; } +static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + __be32 spi; + struct xfrm_state *x; + struct ip6_tnl *t; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah; + struct ip_comp_hdr *ipch; + struct net *net = dev_net(skb->dev); + const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; + int protocol = iph->nexthdr; + + t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr); + if (!t) + return -1; + + switch (protocol) { + case IPPROTO_ESP: + esph = (struct ip_esp_hdr *)(skb->data + offset); + spi = esph->spi; + break; + case IPPROTO_AH: + ah = (struct ip_auth_hdr *)(skb->data + offset); + spi = ah->spi; + break; + case IPPROTO_COMP: + ipch = (struct ip_comp_hdr *)(skb->data + offset); + spi = htonl(ntohs(ipch->cpi)); + break; + default: + return 0; + } + + if (type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) + return 0; + + x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET6); + if (!x) + return 0; + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, skb->dev->ifindex, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); + xfrm_state_put(x); + + return 0; +} + static void vti6_link_config(struct ip6_tnl *t) { - struct dst_entry *dst; struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; - struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); - /* Set up flowi template */ - fl6->saddr = p->laddr; - fl6->daddr = p->raddr; - fl6->flowi6_oif = p->link; - fl6->flowi6_mark = be32_to_cpu(p->i_key); - fl6->flowi6_proto = p->proto; - fl6->flowlabel = 0; - p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | IP6_TNL_F_CAP_PER_PACKET); p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); @@ -472,28 +577,6 @@ static void vti6_link_config(struct ip6_tnl *t) dev->flags &= ~IFF_POINTOPOINT; dev->iflink = p->link; - - if (p->flags & IP6_TNL_F_CAP_XMIT) { - - dst = ip6_route_output(dev_net(dev), NULL, fl6); - if (dst->error) - return; - - dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6), - NULL, 0); - if (IS_ERR(dst)) - return; - - if (dst->dev) { - dev->hard_header_len = dst->dev->hard_header_len; - - dev->mtu = dst_mtu(dst); - - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; - } - dst_release(dst); - } } /** @@ -720,7 +803,6 @@ static void vti6_dev_setup(struct net_device *dev) t = netdev_priv(dev); dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -731,18 +813,12 @@ static void vti6_dev_setup(struct net_device *dev) static inline int vti6_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - int i; t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *stats; - stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&stats->syncp); - } return 0; } @@ -914,11 +990,6 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .fill_info = vti6_fill_info, }; -static struct xfrm_tunnel_notifier vti6_handler __read_mostly = { - .handler = vti6_rcv, - .priority = 1, -}; - static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) { int h; @@ -990,6 +1061,27 @@ static struct pernet_operations vti6_net_ops = { .size = sizeof(struct vti6_net), }; +static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + +static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + +static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1003,11 +1095,33 @@ static int __init vti6_tunnel_init(void) if (err < 0) goto out_pernet; - err = xfrm6_mode_tunnel_input_register(&vti6_handler); + err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); if (err < 0) { - pr_err("%s: can't register vti6\n", __func__); + unregister_pernet_device(&vti6_net_ops); + pr_err("%s: can't register vti6 protocol\n", __func__); + goto out; } + + err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); + if (err < 0) { + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti6_net_ops); + pr_err("%s: can't register vti6 protocol\n", __func__); + + goto out; + } + + err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); + if (err < 0) { + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti6_net_ops); + pr_err("%s: can't register vti6 protocol\n", __func__); + + goto out; + } + err = rtnl_link_register(&vti6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1015,7 +1129,9 @@ static int __init vti6_tunnel_init(void) return 0; rtnl_link_failed: - xfrm6_mode_tunnel_input_deregister(&vti6_handler); + xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); out: unregister_pernet_device(&vti6_net_ops); out_pernet: @@ -1028,8 +1144,12 @@ out_pernet: static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); - if (xfrm6_mode_tunnel_input_deregister(&vti6_handler)) - pr_info("%s: can't deregister vti6\n", __func__); + if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP)) + pr_info("%s: can't deregister protocol\n", __func__); + if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH)) + pr_info("%s: can't deregister protocol\n", __func__); + if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP)) + pr_info("%s: can't deregister protocol\n", __func__); unregister_pernet_device(&vti6_net_ops); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0eb4038a4d63..8737400af0a0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2349,13 +2349,14 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc6_cache *c, int cmd) + u32 portid, u32 seq, struct mfc6_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2423,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, if (skb == NULL) goto errout; - err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2462,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2476,7 +2478,8 @@ next_entry: if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index da9becb42e81..d1c793cffcb5 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -53,7 +53,7 @@ #include <linux/icmpv6.h> #include <linux/mutex.h> -static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); @@ -65,19 +65,21 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) @@ -174,6 +176,11 @@ out: return err; } +static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type ipcomp6_type = { .description = "IPCOMP6", @@ -186,11 +193,12 @@ static const struct xfrm_type ipcomp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol ipcomp6_protocol = +static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, + .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init ipcomp6_init(void) @@ -199,7 +207,7 @@ static int __init ipcomp6_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) { + if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; @@ -209,7 +217,7 @@ static int __init ipcomp6_init(void) static void __exit ipcomp6_fini(void) { - if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) + if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0a00f449de5e..edb58aff4ae7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -722,7 +722,7 @@ done: case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE) + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) goto e_inval; np->pmtudisc = val; retv = 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index e1e47350784b..08b367c6b9cf 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1620,11 +1620,12 @@ static void mld_sendpack(struct sk_buff *skb) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); - } else - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); + ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); + } else { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + } rcu_read_unlock(); return; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 827f795209cf..6313abd53c9d 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -6,24 +6,24 @@ #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> +#include <net/secure_seq.h> void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) { static atomic_t ipv6_fragmentation_id; + struct in6_addr addr; int old, new; #if IS_ENABLED(CONFIG_IPV6) - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } + struct inet_peer *peer; + struct net *net; + + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + if (peer) { + fhdr->identification = htonl(inet_getid(peer, 0)); + inet_putpeer(peer); + return; } #endif do { @@ -32,7 +32,10 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) if (!new) new = 1; } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - fhdr->identification = htonl(new); + + addr = rt->rt6i_dst.addr; + addr.s6_addr32[0] ^= (__force __be32)new; + fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32)); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index fb9beb78f00b..bda74291c3e0 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -135,6 +135,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -181,8 +182,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, - ICMP6_MIB_OUTERRORS); + ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11dac21e6586..5015c50a5ba7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -149,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) unsigned long prev, new; p = peer->metrics; - if (inet_metrics_new(peer)) + if (inet_metrics_new(peer) || + (old & DST_METRICS_FORCE_OVERWRITE)) memcpy(p, old_p, sizeof(u32) * RTAX_MAX); new = (unsigned long) p; @@ -373,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static bool rt6_need_strict(const struct in6_addr *daddr) -{ - return ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); -} - /* Multipath route selection: * Hash based function using packet header and flowlabel. * Adapted from fib_info_hashfn() @@ -857,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup); be destroyed. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info); + err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); write_unlock_bh(&table->tb6_lock); return err; @@ -875,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt) struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - return __ip6_ins_rt(rt, &info); + return __ip6_ins_rt(rt, &info, NULL, 0); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -1513,7 +1509,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; @@ -1543,17 +1539,11 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; - - if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { - u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (!metrics) { - err = -ENOMEM; - goto out; - } - dst_init_metrics(&rt->dst, metrics, 0); + if (rt->rt6i_dst.plen == 128) { + rt->dst.flags |= DST_HOST; + dst_metrics_set_force_overwrite(&rt->dst); } + #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1672,31 +1662,13 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_flags = cfg->fc_flags; install_route: - if (cfg->fc_mx) { - struct nlattr *nla; - int remaining; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - - if (type) { - if (type > RTAX_MAX) { - err = -EINVAL; - goto out; - } - - dst_metric_set(&rt->dst, type, nla_get_u32(nla)); - } - } - } - rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); out: if (dev) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3dfbcf1dcb1c..1693c8d885f0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -475,6 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } + ip_tunnel_dst_reset_all(tunnel); dev_put(dev); } @@ -1082,6 +1083,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); } + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); } @@ -1112,6 +1114,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); return 0; } @@ -1271,6 +1274,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); break; @@ -1326,6 +1330,9 @@ static const struct net_device_ops ipip6_netdev_ops = { static void ipip6_dev_free(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1356,7 +1363,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1365,14 +1371,14 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipip6_tunnel_stats; - ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_tunnel_stats->syncp); + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; } return 0; @@ -1384,7 +1390,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1395,14 +1400,14 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ipip6_fb_stats; - ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_fb_stats->syncp); + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; } dev_hold(dev); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 889079b2ea85..5ca56cee2dae 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -39,7 +39,7 @@ #include <linux/ipsec.h> #include <linux/times.h> #include <linux/slab.h> - +#include <linux/uaccess.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> @@ -65,8 +65,6 @@ #include <net/tcp_memcontrol.h> #include <net/busy_poll.h> -#include <asm/uaccess.h> - #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -501,8 +499,10 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) int res; res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); - if (!res) + if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } return res; } @@ -530,8 +530,8 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); } -static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, - int optlen) +static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, + int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; @@ -715,7 +715,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, + .syn_ack_timeout = tcp_syn_ack_timeout, }; #ifdef CONFIG_TCP_MD5SIG @@ -726,7 +726,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 tsval, u32 tsecr, + u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, u8 tclass, u32 label) { @@ -798,8 +798,10 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + if (rt6_need_strict(&fl6.daddr) || !oif) fl6.flowi6_oif = inet6_iif(skb); + else + fl6.flowi6_oif = oif; fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -833,6 +835,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif + int oif; if (th->rst) return; @@ -876,7 +879,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0); + oif = sk ? sk->sk_bound_dev_if : 0; + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -888,11 +892,11 @@ release_sk1: } static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 tsval, u32 tsecr, + u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass, + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); } @@ -904,7 +908,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), + tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), tw->tw_tclass, (tw->tw_flowlabel << 12)); inet_twsk_put(tw); @@ -914,7 +918,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, - req->rcv_wnd, tcp_time_stamp, req->ts_recent, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } @@ -1259,7 +1263,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); + if (key != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key @@ -1303,9 +1308,8 @@ static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0)); - if (skb->len <= 76) { + if (skb->len <= 76) return __skb_checksum_complete(skb); - } return 0; } @@ -1335,7 +1339,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return tcp_v4_do_rcv(sk, skb); #ifdef CONFIG_TCP_MD5SIG - if (tcp_v6_inbound_md5_hash (sk, skb)) + if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard; #endif @@ -1602,7 +1606,8 @@ do_time_wait: break; case TCP_TW_RST: goto no_tcp_socket; - case TCP_TW_SUCCESS:; + case TCP_TW_SUCCESS: + ; } goto discard_it; } @@ -1647,7 +1652,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, + .twsk_destructor = tcp_twsk_destructor, }; static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1681,7 +1686,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { /* * TCP over IPv4 via INET6 API */ - static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7359f9eaa8d..b261ee8b83fc 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index cb04f7a16b5e..901ef6f8addc 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -18,65 +18,6 @@ #include <net/ipv6.h> #include <net/xfrm.h> -/* Informational hook. The decap is still done here. */ -static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; -static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex); - -int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -EEXIST; - int priority = handler->priority; - - mutex_lock(&xfrm6_mode_tunnel_input_mutex); - - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t->priority > priority) - break; - if (t->priority == priority) - goto err; - - } - - handler->next = *pprev; - rcu_assign_pointer(*pprev, handler); - - ret = 0; - -err: - mutex_unlock(&xfrm6_mode_tunnel_input_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register); - -int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -ENOENT; - - mutex_lock(&xfrm6_mode_tunnel_input_mutex); - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t == handler) { - *pprev = handler->next; - ret = 0; - break; - } - } - mutex_unlock(&xfrm6_mode_tunnel_input_mutex); - synchronize_net(); - - return ret; -} -EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister); - static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) { const struct ipv6hdr *outer_iph = ipv6_hdr(skb); @@ -130,7 +71,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) @@ -138,9 +78,6 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - for_each_input_rcu(rcv_notify_handlers, handler) - handler->handler(skb); - err = skb_unclone(skb, GFP_ATOMIC); if (err) goto out; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 5f8e128c512d..2a0bbda2c76a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -389,11 +389,17 @@ int __init xfrm6_init(void) if (ret) goto out_policy; + ret = xfrm6_protocol_init(); + if (ret) + goto out_state; + #ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm6_net_ops); #endif out: return ret; +out_state: + xfrm6_state_fini(); out_policy: xfrm6_policy_fini(); goto out; @@ -404,6 +410,7 @@ void xfrm6_fini(void) #ifdef CONFIG_SYSCTL unregister_pernet_subsys(&xfrm6_net_ops); #endif + xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); dst_entries_destroy(&xfrm6_dst_ops); diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c new file mode 100644 index 000000000000..6ab989c486f7 --- /dev/null +++ b/net/ipv6/xfrm6_protocol.c @@ -0,0 +1,270 @@ +/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6. + * + * Copyright (C) 2013 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + * + * Based on: + * net/ipv4/xfrm4_protocol.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/icmpv6.h> +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/xfrm.h> + +static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly; +static DEFINE_MUTEX(xfrm6_protocol_mutex); + +static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp6_handlers; + case IPPROTO_AH: + return &ah6_handlers; + case IPPROTO_COMP: + return &ipcomp6_handlers; + } + + return NULL; +} + +#define for_each_protocol_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) \ + +int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +{ + int ret; + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(*proto_handlers(protocol), handler) + if ((ret = handler->cb_handler(skb, err)) <= 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_cb); + +static int xfrm6_esp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(esp6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(esp6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static int xfrm6_ah_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(ah6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(ah6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static int xfrm6_ipcomp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(ipcomp6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(ipcomp6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static const struct inet6_protocol esp6_protocol = { + .handler = xfrm6_esp_rcv, + .err_handler = xfrm6_esp_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ah6_protocol = { + .handler = xfrm6_ah_rcv, + .err_handler = xfrm6_ah_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ipcomp6_protocol = { + .handler = xfrm6_ipcomp_rcv, + .err_handler = xfrm6_ipcomp_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static struct xfrm_input_afinfo xfrm6_input_afinfo = { + .family = AF_INET6, + .owner = THIS_MODULE, + .callback = xfrm6_rcv_cb, +}; + +static inline const struct inet6_protocol *netproto(unsigned char protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp6_protocol; + case IPPROTO_AH: + return &ah6_protocol; + case IPPROTO_COMP: + return &ipcomp6_protocol; + } + + return NULL; +} + +int xfrm6_protocol_register(struct xfrm6_protocol *handler, + unsigned char protocol) +{ + struct xfrm6_protocol __rcu **pprev; + struct xfrm6_protocol *t; + bool add_netproto = false; + + int ret = -EEXIST; + int priority = handler->priority; + + mutex_lock(&xfrm6_protocol_mutex); + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm6_protocol_mutex))) + add_netproto = true; + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t->priority < priority) + break; + if (t->priority == priority) + goto err; + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm6_protocol_mutex); + + if (add_netproto) { + if (inet6_add_protocol(netproto(protocol), protocol)) { + pr_err("%s: can't add protocol\n", __func__); + ret = -EAGAIN; + } + } + + return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_register); + +int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, + unsigned char protocol) +{ + struct xfrm6_protocol __rcu **pprev; + struct xfrm6_protocol *t; + int ret = -ENOENT; + + mutex_lock(&xfrm6_protocol_mutex); + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm6_protocol_mutex))) { + if (inet6_del_protocol(netproto(protocol), protocol) < 0) { + pr_err("%s: can't remove protocol\n", __func__); + ret = -EAGAIN; + } + } + + mutex_unlock(&xfrm6_protocol_mutex); + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_deregister); + +int __init xfrm6_protocol_init(void) +{ + return xfrm_input_register_afinfo(&xfrm6_input_afinfo); +} + +void xfrm6_protocol_fini(void) +{ + xfrm_input_unregister_afinfo(&xfrm6_input_afinfo); +} |