summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c40
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/ip6_fib.c26
-rw-r--r--net/ipv6/ip6_gre.c93
-rw-r--r--net/ipv6/ip6_output.c20
-rw-r--r--net/ipv6/ip6_tunnel.c147
-rw-r--r--net/ipv6/ip6mr.c3
-rw-r--r--net/ipv6/netfilter/Kconfig1
-rw-r--r--net/ipv6/route.c275
-rw-r--r--net/ipv6/xfrm6_policy.c1
10 files changed, 416 insertions, 196 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 99c0f2b843f0..36b85bd05ac8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1943,37 +1943,6 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
__ipv6_dev_ac_dec(ifp->idev, &addr);
}
-static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
-{
- if (dev->addr_len != ETH_ALEN)
- return -1;
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
-
- /*
- * The zSeries OSA network cards can be shared among various
- * OS instances, but the OSA cards have only one MAC address.
- * This leads to duplicate address conflicts in conjunction
- * with IPv6 if more than one instance uses the same card.
- *
- * The driver for these cards can deliver a unique 16-bit
- * identifier for each instance sharing the same card. It is
- * placed instead of 0xFFFE in the interface identifier. The
- * "u" bit of the interface identifier is not inverted in this
- * case. Hence the resulting interface identifier has local
- * scope according to RFC2373.
- */
- if (dev->dev_id) {
- eui[3] = (dev->dev_id >> 8) & 0xFF;
- eui[4] = dev->dev_id & 0xFF;
- } else {
- eui[3] = 0xFF;
- eui[4] = 0xFE;
- eui[0] ^= 2;
- }
- return 0;
-}
-
static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
{
if (dev->addr_len != IEEE802154_ADDR_LEN)
@@ -3150,6 +3119,8 @@ static void addrconf_gre_config(struct net_device *dev)
}
addrconf_addr_gen(idev, true);
+ if (dev->flags & IFF_POINTOPOINT)
+ addrconf_add_mroute(dev);
}
#endif
@@ -5158,13 +5129,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
ifp->idev->dev, 0, 0);
- if (rt && ip6_del_rt(rt))
- dst_free(&rt->dst);
+ if (rt)
+ ip6_del_rt(rt);
}
dst_hold(&ifp->rt->dst);
- if (ip6_del_rt(ifp->rt))
- dst_free(&ifp->rt->dst);
+ ip6_del_rt(ifp->rt);
rt_genid_bump_ipv6(net);
break;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2367a16eae58..9f777ec59a59 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -258,11 +258,6 @@ nla_put_failure:
return -ENOBUFS;
}
-static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
-{
- return 0x3FFF;
-}
-
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
@@ -279,7 +274,6 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.configure = fib6_rule_configure,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
- .default_pref = fib6_rule_default_pref,
.nlmsg_payload = fib6_rule_nlmsg_payload,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = fib6_rule_policy,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 418d9823692b..7d2e0023c72d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -155,6 +155,11 @@ static void node_free(struct fib6_node *fn)
kmem_cache_free(fib6_node_kmem, fn);
}
+static void rt6_rcu_free(struct rt6_info *rt)
+{
+ call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
{
int cpu;
@@ -169,7 +174,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
pcpu_rt = *ppcpu_rt;
if (pcpu_rt) {
- dst_free(&pcpu_rt->dst);
+ rt6_rcu_free(pcpu_rt);
*ppcpu_rt = NULL;
}
}
@@ -181,7 +186,7 @@ static void rt6_release(struct rt6_info *rt)
{
if (atomic_dec_and_test(&rt->rt6i_ref)) {
rt6_free_pcpu(rt);
- dst_free(&rt->dst);
+ rt6_rcu_free(rt);
}
}
@@ -846,7 +851,7 @@ add:
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info, 0);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -872,7 +877,7 @@ add:
rt->rt6i_node = fn;
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
- inet6_rt_notify(RTM_NEWROUTE, rt, info);
+ inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
@@ -933,6 +938,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
int replace_required = 0;
int sernum = fib6_new_sernum(info->nl_net);
+ if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) &&
+ !atomic_read(&rt->dst.__refcnt)))
+ return -EINVAL;
+
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
allow_create = 0;
@@ -1025,6 +1034,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
fib6_start_gc(info->nl_net, rt);
if (!(rt->rt6i_flags & RTF_CACHE))
fib6_prune_clones(info->nl_net, pn);
+ rt->dst.flags &= ~DST_NOCACHE;
}
out:
@@ -1049,7 +1059,8 @@ out:
atomic_inc(&pn->leaf->rt6i_ref);
}
#endif
- dst_free(&rt->dst);
+ if (!(rt->dst.flags & DST_NOCACHE))
+ dst_free(&rt->dst);
}
return err;
@@ -1060,7 +1071,8 @@ out:
st_failure:
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
fib6_repair_tree(info->nl_net, fn);
- dst_free(&rt->dst);
+ if (!(rt->dst.flags & DST_NOCACHE))
+ dst_free(&rt->dst);
return err;
#endif
}
@@ -1410,7 +1422,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
fib6_purge_rt(rt, fn, net);
- inet6_rt_notify(RTM_DELROUTE, rt, info);
+ inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4038c694ec03..3c7b9310b33f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -404,13 +404,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
case ICMPV6_DEST_UNREACH:
- net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
break;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
- net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
}
break;
case ICMPV6_PARAMPROB:
@@ -421,12 +421,12 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (teli && teli == be32_to_cpu(info) - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
- net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
}
} else {
- net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
}
break;
case ICMPV6_PKT_TOOBIG:
@@ -634,20 +634,20 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
if (!fl6->flowi6_mark)
- dst = ip6_tnl_dst_check(tunnel);
+ dst = ip6_tnl_dst_get(tunnel);
if (!dst) {
- ndst = ip6_route_output(net, NULL, fl6);
+ dst = ip6_route_output(net, NULL, fl6);
- if (ndst->error)
+ if (dst->error)
goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
goto tx_err_link_failure;
}
- dst = ndst;
+ ndst = dst;
}
tdev = dst->dev;
@@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb = new_skb;
}
- if (fl6->flowi6_mark) {
- skb_dst_set(skb, dst);
- ndst = NULL;
- } else {
- skb_dst_set_noref(skb, dst);
- }
+ if (!fl6->flowi6_mark && ndst)
+ ip6_tnl_dst_set(tunnel, ndst);
+ skb_dst_set(skb, dst);
proto = NEXTHDR_GRE;
if (encap_limit >= 0) {
@@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_set_inner_protocol(skb, protocol);
ip6tunnel_xmit(NULL, skb, dev);
- if (ndst)
- ip6_tnl_dst_store(tunnel, ndst);
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = {
static void ip6gre_dev_free(struct net_device *dev)
{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ ip6_tnl_dst_destroy(t);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1245,9 +1243,10 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
netif_keep_dst(dev);
}
-static int ip6gre_tunnel_init(struct net_device *dev)
+static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
+ int ret;
tunnel = netdev_priv(dev);
@@ -1255,16 +1254,37 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ ret = ip6_tnl_dst_init(tunnel);
+ if (ret) {
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ip6gre_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+ int ret;
+
+ ret = ip6gre_tunnel_init_common(dev);
+ if (ret)
+ return ret;
+
+ tunnel = netdev_priv(dev);
+
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
if (ipv6_addr_any(&tunnel->parms.raddr))
dev->header_ops = &ip6gre_header_ops;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
return 0;
}
@@ -1460,19 +1480,16 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
struct ip6_tnl *tunnel;
+ int ret;
- tunnel = netdev_priv(dev);
+ ret = ip6gre_tunnel_init_common(dev);
+ if (ret)
+ return ret;
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
+ tunnel = netdev_priv(dev);
ip6gre_tnl_link_config(tunnel, 1);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 26ea47930740..d03d6da772f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -376,6 +376,9 @@ int ip6_forward(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
+ if (unlikely(skb->sk))
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
@@ -586,20 +589,22 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr);
+ hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb);
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
- skb_cloned(skb))
+ skb_cloned(skb) ||
+ skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
goto slow_path;
skb_walk_frags(skb, frag) {
/* Correct geometry. */
if (frag->len > mtu ||
((frag->len & 7) && frag->next) ||
- skb_headroom(frag) < hlen)
+ skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
goto slow_path_clean;
/* Partially cloned skb? */
@@ -616,8 +621,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
err = 0;
offset = 0;
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
/* BUILD HEADER */
*prevhdr = NEXTHDR_FRAGMENT;
@@ -625,8 +628,11 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
if (!tmp_hdr) {
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto fail;
}
+ frag = skb_shinfo(skb)->frag_list;
+ skb_frag_list_init(skb);
__skb_pull(skb, hlen);
fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
@@ -723,7 +729,6 @@ slow_path:
*/
*prevhdr = NEXTHDR_FRAGMENT;
- hroom = LL_RESERVED_SPACE(rt->dst.dev);
troom = rt->dst.dev->needed_tailroom;
/*
@@ -872,7 +877,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
#ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+ (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
dst_release(dst);
dst = NULL;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b0ab420612bc..eabffbb89795 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,36 +126,92 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
* Locking : hash tables are protected by RCU and RTNL
*/
-struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
+ struct dst_entry *dst)
{
- struct dst_entry *dst = t->dst_cache;
+ write_seqlock_bh(&idst->lock);
+ dst_release(rcu_dereference_protected(
+ idst->dst,
+ lockdep_is_held(&idst->lock.lock)));
+ if (dst) {
+ dst_hold(dst);
+ idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
+ } else {
+ idst->cookie = 0;
+ }
+ rcu_assign_pointer(idst->dst, dst);
+ write_sequnlock_bh(&idst->lock);
+}
+
+struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
+{
+ struct ip6_tnl_dst *idst;
+ struct dst_entry *dst;
+ unsigned int seq;
+ u32 cookie;
- if (dst && dst->obsolete &&
- !dst->ops->check(dst, t->dst_cookie)) {
- t->dst_cache = NULL;
+ idst = raw_cpu_ptr(t->dst_cache);
+
+ rcu_read_lock();
+ do {
+ seq = read_seqbegin(&idst->lock);
+ dst = rcu_dereference(idst->dst);
+ cookie = idst->cookie;
+ } while (read_seqretry(&idst->lock, seq));
+
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
+ rcu_read_unlock();
+
+ if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
+ ip6_tnl_per_cpu_dst_set(idst, NULL);
dst_release(dst);
- return NULL;
+ dst = NULL;
}
-
return dst;
}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
- dst_release(t->dst_cache);
- t->dst_cache = NULL;
+ int i;
+
+ for_each_possible_cpu(i)
+ ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
-void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
+{
+ ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
+
+}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
+
+void ip6_tnl_dst_destroy(struct ip6_tnl *t)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
- t->dst_cookie = rt6_get_cookie(rt);
- dst_release(t->dst_cache);
- t->dst_cache = dst;
+ if (!t->dst_cache)
+ return;
+
+ ip6_tnl_dst_reset(t);
+ free_percpu(t->dst_cache);
}
-EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
+
+int ip6_tnl_dst_init(struct ip6_tnl *t)
+{
+ int i;
+
+ t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
+ if (!t->dst_cache)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i)
+ seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -271,6 +327,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
static void ip6_dev_free(struct net_device *dev)
{
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ ip6_tnl_dst_destroy(t);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -510,14 +569,14 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
case ICMPV6_DEST_UNREACH:
- net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
rel_msg = 1;
break;
case ICMPV6_TIME_EXCEED:
if ((*code) == ICMPV6_EXC_HOPLIMIT) {
- net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
rel_msg = 1;
}
break;
@@ -529,13 +588,13 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
if (teli && teli == *info - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
- net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
rel_msg = 1;
}
} else {
- net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
- t->parms.name);
+ net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
}
break;
case ICMPV6_PKT_TOOBIG:
@@ -1010,23 +1069,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
} else if (!fl6->flowi6_mark)
- dst = ip6_tnl_dst_check(t);
+ dst = ip6_tnl_dst_get(t);
if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
goto tx_err_link_failure;
if (!dst) {
- ndst = ip6_route_output(net, NULL, fl6);
+ dst = ip6_route_output(net, NULL, fl6);
- if (ndst->error)
+ if (dst->error)
goto tx_err_link_failure;
- ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(ndst)) {
- err = PTR_ERR(ndst);
- ndst = NULL;
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
goto tx_err_link_failure;
}
- dst = ndst;
+ ndst = dst;
}
tdev = dst->dev;
@@ -1072,12 +1131,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
consume_skb(skb);
skb = new_skb;
}
- if (fl6->flowi6_mark) {
- skb_dst_set(skb, dst);
- ndst = NULL;
- } else {
- skb_dst_set_noref(skb, dst);
- }
+
+ if (!fl6->flowi6_mark && ndst)
+ ip6_tnl_dst_set(t, ndst);
+ skb_dst_set(skb, dst);
+
skb->transport_header = skb->network_header;
proto = fl6->flowi6_proto;
@@ -1101,14 +1159,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
ip6tunnel_xmit(NULL, skb, dev);
- if (ndst)
- ip6_tnl_dst_store(t, ndst);
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(ndst);
+ dst_release(dst);
return err;
}
@@ -1573,12 +1629,21 @@ static inline int
ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
+ int ret;
t->dev = dev;
t->net = dev_net(dev);
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+
+ ret = ip6_tnl_dst_init(t);
+ if (ret) {
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
+ }
+
return 0;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 74ceb73c1c9a..0e004cc42a22 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -217,7 +217,6 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
.match = ip6mr_rule_match,
.configure = ip6mr_rule_configure,
.compare = ip6mr_rule_compare,
- .default_pref = fib_default_rule_pref,
.fill = ip6mr_rule_fill,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = ip6mr_rule_policy,
@@ -550,7 +549,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
if (it->cache == &mrt->mfc6_unres_queue)
spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == mrt->mfc6_cache_array)
+ else if (it->cache == &mrt->mfc6_cache_array[it->ct])
read_unlock(&mrt_lock);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 96833e4b3193..f6a024e141e5 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -58,6 +58,7 @@ endif # NF_TABLES
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
help
This option enables the nf_dup_ipv6 core, which duplicates an IPv6
packet to be rerouted to another destination.
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f45cac6f8356..946880ad48ac 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -142,6 +142,9 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
struct net_device *loopback_dev = net->loopback_dev;
int cpu;
+ if (dev == loopback_dev)
+ return;
+
for_each_possible_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
struct rt6_info *rt;
@@ -151,14 +154,12 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
struct inet6_dev *rt_idev = rt->rt6i_idev;
struct net_device *rt_dev = rt->dst.dev;
- if (rt_idev && (rt_idev->dev == dev || !dev) &&
- rt_idev->dev != loopback_dev) {
+ if (rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(loopback_dev);
in6_dev_put(rt_idev);
}
- if (rt_dev && (rt_dev == dev || !dev) &&
- rt_dev != loopback_dev) {
+ if (rt_dev == dev) {
rt->dst.dev = loopback_dev;
dev_hold(rt->dst.dev);
dev_put(rt_dev);
@@ -247,12 +248,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
{
}
-static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
- unsigned long old)
-{
- return NULL;
-}
-
static struct dst_ops ip6_dst_blackhole_ops = {
.family = AF_INET6,
.destroy = ip6_dst_destroy,
@@ -261,7 +256,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
.redirect = ip6_rt_blackhole_redirect,
- .cow_metrics = ip6_rt_blackhole_cow_metrics,
+ .cow_metrics = dst_cow_metrics_generic,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -318,6 +313,15 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
#endif
+static void rt6_info_init(struct rt6_info *rt)
+{
+ struct dst_entry *dst = &rt->dst;
+
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ INIT_LIST_HEAD(&rt->rt6i_uncached);
+}
+
/* allocate dst with ip6_dst_ops */
static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct net_device *dev,
@@ -326,13 +330,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
0, DST_OBSOLETE_FORCE_CHK, flags);
- if (rt) {
- struct dst_entry *dst = &rt->dst;
+ if (rt)
+ rt6_info_init(rt);
- memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
- INIT_LIST_HEAD(&rt->rt6i_siblings);
- INIT_LIST_HEAD(&rt->rt6i_uncached);
- }
return rt;
}
@@ -1068,6 +1068,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ oif = 0;
+
redo_rt6_select:
rt = rt6_select(fn, oif, strict);
if (rt->rt6i_nsiblings)
@@ -1190,13 +1193,16 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
int flags = 0;
+ bool any_src;
fl6->flowi6_iif = LOOPBACK_IFINDEX;
- if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
+ any_src = ipv6_addr_any(&fl6->saddr);
+ if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
+ (fl6->flowi6_oif && any_src))
flags |= RT6_LOOKUP_F_IFACE;
- if (!ipv6_addr_any(&fl6->saddr))
+ if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
@@ -1212,24 +1218,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
- new = &rt->dst;
-
- memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+ rt6_info_init(rt);
+ new = &rt->dst;
new->__use = 1;
new->input = dst_discard;
new->output = dst_discard_sk;
- if (dst_metrics_read_only(&ort->dst))
- new->_metrics = ort->dst._metrics;
- else
- dst_copy_metrics(new, &ort->dst);
+ dst_copy_metrics(new, &ort->dst);
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
rt->rt6i_gateway = ort->rt6i_gateway;
- rt->rt6i_flags = ort->rt6i_flags;
+ rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1322,8 +1324,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt) {
if (rt->rt6i_flags & RTF_CACHE) {
dst_hold(&rt->dst);
- if (ip6_del_rt(rt))
- dst_free(&rt->dst);
+ ip6_del_rt(rt);
} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
rt->rt6i_node->fn_sernum = -1;
}
@@ -1748,7 +1749,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
return -EINVAL;
}
-int ip6_route_add(struct fib6_config *cfg)
+int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
{
int err;
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1756,7 +1757,6 @@ int ip6_route_add(struct fib6_config *cfg)
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
- struct mx6_config mxc = { .mx = NULL, };
int addr_type;
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
@@ -1887,9 +1887,11 @@ int ip6_route_add(struct fib6_config *cfg)
rt->dst.input = ip6_pkt_prohibit;
break;
case RTN_THROW:
+ case RTN_UNREACHABLE:
default:
rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
- : -ENETUNREACH;
+ : (cfg->fc_type == RTN_UNREACHABLE)
+ ? -EHOSTUNREACH : -ENETUNREACH;
rt->dst.output = ip6_pkt_discard_out;
rt->dst.input = ip6_pkt_discard;
break;
@@ -1981,6 +1983,32 @@ install_route:
cfg->fc_nlinfo.nl_net = dev_net(dev);
+ *rt_ret = rt;
+
+ return 0;
+out:
+ if (dev)
+ dev_put(dev);
+ if (idev)
+ in6_dev_put(idev);
+ if (rt)
+ dst_free(&rt->dst);
+
+ *rt_ret = NULL;
+
+ return err;
+}
+
+int ip6_route_add(struct fib6_config *cfg)
+{
+ struct mx6_config mxc = { .mx = NULL, };
+ struct rt6_info *rt = NULL;
+ int err;
+
+ err = ip6_route_info_create(cfg, &rt);
+ if (err)
+ goto out;
+
err = ip6_convert_metrics(&mxc, cfg);
if (err)
goto out;
@@ -1988,14 +2016,12 @@ install_route:
err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
kfree(mxc.mx);
+
return err;
out:
- if (dev)
- dev_put(dev);
- if (idev)
- in6_dev_put(idev);
if (rt)
dst_free(&rt->dst);
+
return err;
}
@@ -2005,7 +2031,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
struct fib6_table *table;
struct net *net = dev_net(rt->dst.dev);
- if (rt == net->ipv6.ip6_null_entry) {
+ if (rt == net->ipv6.ip6_null_entry ||
+ rt->dst.flags & DST_NOCACHE) {
err = -ENOENT;
goto out;
}
@@ -2492,6 +2519,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+ rt->dst.flags |= DST_NOCACHE;
atomic_set(&rt->dst.__refcnt, 1);
@@ -2595,7 +2623,8 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
fib6_clean_all(net, fib6_ifdown, &adn);
icmp6_clean_all(fib6_ifdown, &adn);
- rt6_uncached_list_flush_dev(net, dev);
+ if (dev)
+ rt6_uncached_list_flush_dev(net, dev);
}
struct rt6_mtu_change_arg {
@@ -2776,19 +2805,78 @@ errout:
return err;
}
-static int ip6_route_multipath(struct fib6_config *cfg, int add)
+struct rt6_nh {
+ struct rt6_info *rt6_info;
+ struct fib6_config r_cfg;
+ struct mx6_config mxc;
+ struct list_head next;
+};
+
+static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
+{
+ struct rt6_nh *nh;
+
+ list_for_each_entry(nh, rt6_nh_list, next) {
+ pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
+ &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
+ nh->r_cfg.fc_ifindex);
+ }
+}
+
+static int ip6_route_info_append(struct list_head *rt6_nh_list,
+ struct rt6_info *rt, struct fib6_config *r_cfg)
+{
+ struct rt6_nh *nh;
+ struct rt6_info *rtnh;
+ int err = -EEXIST;
+
+ list_for_each_entry(nh, rt6_nh_list, next) {
+ /* check if rt6_info already exists */
+ rtnh = nh->rt6_info;
+
+ if (rtnh->dst.dev == rt->dst.dev &&
+ rtnh->rt6i_idev == rt->rt6i_idev &&
+ ipv6_addr_equal(&rtnh->rt6i_gateway,
+ &rt->rt6i_gateway))
+ return err;
+ }
+
+ nh = kzalloc(sizeof(*nh), GFP_KERNEL);
+ if (!nh)
+ return -ENOMEM;
+ nh->rt6_info = rt;
+ err = ip6_convert_metrics(&nh->mxc, r_cfg);
+ if (err) {
+ kfree(nh);
+ return err;
+ }
+ memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
+ list_add_tail(&nh->next, rt6_nh_list);
+
+ return 0;
+}
+
+static int ip6_route_multipath_add(struct fib6_config *cfg)
{
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
+ struct rt6_info *rt;
+ struct rt6_nh *err_nh;
+ struct rt6_nh *nh, *nh_safe;
int remaining;
int attrlen;
- int err = 0, last_err = 0;
+ int err = 1;
+ int nhn = 0;
+ int replace = (cfg->fc_nlinfo.nlh &&
+ (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
+ LIST_HEAD(rt6_nh_list);
remaining = cfg->fc_mp_len;
-beginning:
rtnh = (struct rtnexthop *)cfg->fc_mp;
- /* Parse a Multipath Entry */
+ /* Parse a Multipath Entry and build a list (rt6_nh_list) of
+ * rt6_info structs per nexthop
+ */
while (rtnh_ok(rtnh, remaining)) {
memcpy(&r_cfg, cfg, sizeof(*cfg));
if (rtnh->rtnh_ifindex)
@@ -2808,22 +2896,32 @@ beginning:
if (nla)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+
+ err = ip6_route_info_create(&r_cfg, &rt);
+ if (err)
+ goto cleanup;
+
+ err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
- last_err = err;
- /* If we are trying to remove a route, do not stop the
- * loop when ip6_route_del() fails (because next hop is
- * already gone), we should try to remove all next hops.
- */
- if (add) {
- /* If add fails, we should try to delete all
- * next hops that have been already added.
- */
- add = 0;
- remaining = cfg->fc_mp_len - remaining;
- goto beginning;
- }
+ dst_free(&rt->dst);
+ goto cleanup;
+ }
+
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ err_nh = NULL;
+ list_for_each_entry(nh, &rt6_nh_list, next) {
+ err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
+ /* nh->rt6_info is used or freed at this point, reset to NULL*/
+ nh->rt6_info = NULL;
+ if (err) {
+ if (replace && nhn)
+ ip6_print_replace_route_err(&rt6_nh_list);
+ err_nh = nh;
+ goto add_errout;
}
+
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
* we have already failed to add the first nexthop:
@@ -2833,6 +2931,62 @@ beginning:
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ nhn++;
+ }
+
+ goto cleanup;
+
+add_errout:
+ /* Delete routes that were already added */
+ list_for_each_entry(nh, &rt6_nh_list, next) {
+ if (err_nh == nh)
+ break;
+ ip6_route_del(&nh->r_cfg);
+ }
+
+cleanup:
+ list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
+ if (nh->rt6_info)
+ dst_free(&nh->rt6_info->dst);
+ kfree(nh->mxc.mx);
+ list_del(&nh->next);
+ kfree(nh);
+ }
+
+ return err;
+}
+
+static int ip6_route_multipath_del(struct fib6_config *cfg)
+{
+ struct fib6_config r_cfg;
+ struct rtnexthop *rtnh;
+ int remaining;
+ int attrlen;
+ int err = 1, last_err = 0;
+
+ remaining = cfg->fc_mp_len;
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+
+ /* Parse a Multipath Entry */
+ while (rtnh_ok(rtnh, remaining)) {
+ memcpy(&r_cfg, cfg, sizeof(*cfg));
+ if (rtnh->rtnh_ifindex)
+ r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+ r_cfg.fc_flags |= RTF_GATEWAY;
+ }
+ }
+ err = ip6_route_del(&r_cfg);
+ if (err)
+ last_err = err;
+
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -2849,7 +3003,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath(&cfg, 0);
+ return ip6_route_multipath_del(&cfg);
else
return ip6_route_del(&cfg);
}
@@ -2864,7 +3018,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath(&cfg, 1);
+ return ip6_route_multipath_add(&cfg);
else
return ip6_route_add(&cfg);
}
@@ -3155,7 +3309,8 @@ errout:
return err;
}
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
+ unsigned int nlm_flags)
{
struct sk_buff *skb;
struct net *net = info->nl_net;
@@ -3170,7 +3325,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
- event, info->portid, seq, 0, 0, 0);
+ event, info->portid, seq, 0, 0, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f10b9400b6d7..da55e0c85bb8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -37,6 +37,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
+ fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));