summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/addrconf_core.c6
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ip6_fib.c103
-rw-r--r--net/ipv6/ip6_output.c315
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/netfilter.c123
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c43
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/reassembly.c42
-rw-r--r--net/ipv6/route.c485
11 files changed, 737 insertions, 405 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b51630ddb728..6b673d4f5ca9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2421,9 +2421,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
+ if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
continue;
- if (no_gw && rt->fib6_nh.fib_nh_gw_family)
+ if (no_gw && rt->fib6_nh->fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;
@@ -6354,16 +6354,16 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
spin_lock(&ifa->lock);
if (ifa->rt) {
- struct fib6_info *rt = ifa->rt;
+ struct fib6_nh *nh = ifa->rt->fib6_nh;
int cpu;
rcu_read_lock();
ifa->rt->dst_nopolicy = val ? true : false;
- if (rt->rt6i_pcpu) {
+ if (nh->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
struct rt6_info **rtp;
- rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+ rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu);
addrconf_set_nopolicy(*rtp, val);
}
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 5b1246635e02..783f3c1466da 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -183,6 +183,11 @@ static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
return -EAFNOSUPPORT;
}
+static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt)
+{
+ return -EAFNOSUPPORT;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
@@ -192,6 +197,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.fib6_select_path = eafnosupport_fib6_select_path,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
.fib6_nh_init = eafnosupport_fib6_nh_init,
+ .ip6_del_rt = eafnosupport_ip6_del_rt,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c04ae282f4e4..cc6f8d0c625a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -926,6 +926,9 @@ static const struct ipv6_stub ipv6_stub_impl = {
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.fib6_nh_init = fib6_nh_init,
.fib6_nh_release = fib6_nh_release,
+ .fib6_update_sernum = fib6_update_sernum_stub,
+ .fib6_rt_update = fib6_rt_update,
+ .ip6_del_rt = ip6_del_rt,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 008421b550c6..cdfb8500ccae 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -147,20 +147,18 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
{
struct fib6_info *f6i;
+ size_t sz = sizeof(*f6i);
- f6i = kzalloc(sizeof(*f6i), gfp_flags);
+ if (with_fib6_nh)
+ sz += sizeof(struct fib6_nh);
+
+ f6i = kzalloc(sz, gfp_flags);
if (!f6i)
return NULL;
- f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
- if (!f6i->rt6i_pcpu) {
- kfree(f6i);
- return NULL;
- }
-
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
@@ -170,36 +168,11 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
- struct rt6_exception_bucket *bucket;
WARN_ON(f6i->fib6_node);
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- kfree(bucket);
-
- if (f6i->rt6i_pcpu) {
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct rt6_info **ppcpu_rt;
- struct rt6_info *pcpu_rt;
-
- ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
- if (pcpu_rt) {
- dst_dev_put(&pcpu_rt->dst);
- dst_release(&pcpu_rt->dst);
- *ppcpu_rt = NULL;
- }
- }
-
- free_percpu(f6i->rt6i_pcpu);
- }
-
- fib6_nh_release(&f6i->fib6_nh);
-
+ fib6_nh_release(f6i->fib6_nh);
ip_fib_metrics_put(f6i->fib6_metrics);
-
kfree(f6i);
}
EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
@@ -393,10 +366,10 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
return call_fib6_notifier(nb, net, event_type, &info.info);
}
-static int call_fib6_entry_notifiers(struct net *net,
- enum fib_event_type event_type,
- struct fib6_info *rt,
- struct netlink_ext_ack *extack)
+int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
.info.extack = extack,
@@ -899,16 +872,14 @@ insert_above:
return ln;
}
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
- const struct fib6_table *table)
+static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+ const struct fib6_info *match,
+ const struct fib6_table *table)
{
int cpu;
- /* Make sure rt6_make_pcpu_route() wont add other percpu routes
- * while we are cleaning them here.
- */
- f6i->fib6_destroying = 1;
- mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+ if (!fib6_nh->rt6i_pcpu)
+ return;
/* release the reference to this fib entry from
* all of its cached pcpu routes
@@ -917,9 +888,15 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
struct rt6_info **ppcpu_rt;
struct rt6_info *pcpu_rt;
- ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
pcpu_rt = *ppcpu_rt;
- if (pcpu_rt) {
+
+ /* only dropping the 'from' reference if the cached route
+ * is using 'match'. The cached pcpu_rt->from only changes
+ * from a fib6_info to NULL (ip6_dst_destroy); it can never
+ * change from one fib6_info reference to another
+ */
+ if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
@@ -928,13 +905,27 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
}
}
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+ const struct fib6_table *table)
+{
+ struct fib6_nh *fib6_nh;
+
+ /* Make sure rt6_make_pcpu_route() wont add other percpu routes
+ * while we are cleaning them here.
+ */
+ f6i->fib6_destroying = 1;
+ mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+
+ fib6_nh = f6i->fib6_nh;
+ __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+}
+
static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
struct net *net)
{
struct fib6_table *table = rt->fib6_table;
- if (rt->rt6i_pcpu)
- fib6_drop_pcpu_from(rt, table);
+ fib6_drop_pcpu_from(rt, table);
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
@@ -1222,6 +1213,14 @@ void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
}
+/* allow ipv4 to update sernum via ipv6_stub */
+void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
+{
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_update_sernum_upto_root(net, f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
@@ -2306,14 +2305,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_nh.fib_nh_gw_family) {
+ if (rt->fib6_nh->fib_nh_gw_family) {
flags |= RTF_GATEWAY;
- seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
+ seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
} else {
seq_puts(seq, "00000000000000000000000000000000");
}
- dev = rt->fib6_nh.fib_nh_dev;
+ dev = rt->fib6_nh->fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f9e43323e667..0bb6b6de7962 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -592,6 +592,170 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id,
+ struct ip6_fraglist_iter *iter)
+{
+ unsigned int first_len;
+ struct frag_hdr *fh;
+
+ /* BUILD HEADER */
+ *prevhdr = NEXTHDR_FRAGMENT;
+ iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+ if (!iter->tmp_hdr)
+ return -ENOMEM;
+
+ iter->frag_list = skb_shinfo(skb)->frag_list;
+ iter->frag = iter->frag_list;
+ skb_frag_list_init(skb);
+
+ iter->offset = 0;
+ iter->hlen = hlen;
+ iter->frag_id = frag_id;
+ iter->nexthdr = nexthdr;
+
+ __skb_pull(skb, hlen);
+ fh = __skb_push(skb, sizeof(struct frag_hdr));
+ __skb_push(skb, hlen);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
+
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(IP6_MF);
+ fh->identification = frag_id;
+
+ first_len = skb_pagelen(skb);
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+ return 0;
+}
+EXPORT_SYMBOL(ip6_fraglist_init);
+
+void ip6_fraglist_prepare(struct sk_buff *skb,
+ struct ip6_fraglist_iter *iter)
+{
+ struct sk_buff *frag = iter->frag;
+ unsigned int hlen = iter->hlen;
+ struct frag_hdr *fh;
+
+ frag->ip_summed = CHECKSUM_NONE;
+ skb_reset_transport_header(frag);
+ fh = __skb_push(frag, sizeof(struct frag_hdr));
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
+ iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
+ fh->nexthdr = iter->nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(iter->offset);
+ if (frag->next)
+ fh->frag_off |= htons(IP6_MF);
+ fh->identification = iter->frag_id;
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ip6_copy_metadata(frag, skb);
+}
+EXPORT_SYMBOL(ip6_fraglist_prepare);
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+ unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
+{
+ state->prevhdr = prevhdr;
+ state->nexthdr = nexthdr;
+ state->frag_id = frag_id;
+
+ state->hlen = hlen;
+ state->mtu = mtu;
+
+ state->left = skb->len - hlen; /* Space per frame */
+ state->ptr = hlen; /* Where to start from */
+
+ state->hroom = hdr_room;
+ state->troom = needed_tailroom;
+
+ state->offset = 0;
+}
+EXPORT_SYMBOL(ip6_frag_init);
+
+struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
+{
+ u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
+ struct sk_buff *frag;
+ struct frag_hdr *fh;
+ unsigned int len;
+
+ len = state->left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > state->mtu)
+ len = state->mtu;
+ /* IF: we are not sending up to and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < state->left)
+ len &= ~7;
+
+ /* Allocate buffer */
+ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
+ state->hroom + state->troom, GFP_ATOMIC);
+ if (!frag)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Set up data on packet
+ */
+
+ ip6_copy_metadata(frag, skb);
+ skb_reserve(frag, state->hroom);
+ skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
+ skb_reset_network_header(frag);
+ fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
+ frag->transport_header = (frag->network_header + state->hlen +
+ sizeof(struct frag_hdr));
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+ if (skb->sk)
+ skb_set_owner_w(frag, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
+
+ fragnexthdr_offset = skb_network_header(frag);
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
+ /*
+ * Build fragment header.
+ */
+ fh->nexthdr = state->nexthdr;
+ fh->reserved = 0;
+ fh->identification = state->frag_id;
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
+ len));
+ state->left -= len;
+
+ fh->frag_off = htons(state->offset);
+ if (state->left > 0)
+ fh->frag_off |= htons(IP6_MF);
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+ state->ptr += len;
+ state->offset += len;
+
+ return frag;
+}
+EXPORT_SYMBOL(ip6_frag_next);
+
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
@@ -599,12 +763,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- struct ipv6hdr *tmp_hdr;
- struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len, nexthdr_offset;
- int hroom, troom;
+ struct ip6_frag_state state;
+ unsigned int mtu, hlen, nexthdr_offset;
+ int hroom, err = 0;
__be32 frag_id;
- int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
err = ip6_find_1stfragopt(skb, &prevhdr);
@@ -651,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
+ struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
@@ -678,74 +841,29 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
skb->truesize -= frag->truesize;
}
- err = 0;
- offset = 0;
- /* BUILD HEADER */
-
- *prevhdr = NEXTHDR_FRAGMENT;
- tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
- if (!tmp_hdr) {
- err = -ENOMEM;
+ err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+ &iter);
+ if (err < 0)
goto fail;
- }
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
-
- __skb_pull(skb, hlen);
- fh = __skb_push(skb, sizeof(struct frag_hdr));
- __skb_push(skb, hlen);
- skb_reset_network_header(skb);
- memcpy(skb_network_header(skb), tmp_hdr, hlen);
-
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(IP6_MF);
- fh->identification = frag_id;
-
- first_len = skb_pagelen(skb);
- skb->data_len = first_len - skb_headlen(skb);
- skb->len = first_len;
- ipv6_hdr(skb)->payload_len = htons(first_len -
- sizeof(struct ipv6hdr));
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
- if (frag) {
- frag->ip_summed = CHECKSUM_NONE;
- skb_reset_transport_header(frag);
- fh = __skb_push(frag, sizeof(struct frag_hdr));
- __skb_push(frag, hlen);
- skb_reset_network_header(frag);
- memcpy(skb_network_header(frag), tmp_hdr,
- hlen);
- offset += skb->len - hlen - sizeof(struct frag_hdr);
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(offset);
- if (frag->next)
- fh->frag_off |= htons(IP6_MF);
- fh->identification = frag_id;
- ipv6_hdr(frag)->payload_len =
- htons(frag->len -
- sizeof(struct ipv6hdr));
- ip6_copy_metadata(frag, skb);
- }
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
- if (err || !frag)
+ if (err || !iter.frag)
break;
- skb = frag;
- frag = skb->next;
- skb_mark_not_on_list(skb);
+ skb = ip6_fraglist_next(&iter);
}
- kfree(tmp_hdr);
+ kfree(iter.tmp_hdr);
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -753,7 +871,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return 0;
}
- kfree_skb_list(frag);
+ kfree_skb_list(iter.frag_list);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
@@ -770,91 +888,26 @@ slow_path_clean:
}
slow_path:
- left = skb->len - hlen; /* Space per frame */
- ptr = hlen; /* Where to start from */
-
/*
* Fragment the datagram.
*/
- troom = rt->dst.dev->needed_tailroom;
+ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
+ LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
+ &state);
/*
* Keep copying data until we run out.
*/
- while (left > 0) {
- u8 *fragnexthdr_offset;
-
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending up to and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
- /* Allocate buffer */
- frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
- hroom + troom, GFP_ATOMIC);
- if (!frag) {
- err = -ENOMEM;
+ while (state.left > 0) {
+ frag = ip6_frag_next(skb, &state);
+ if (IS_ERR(frag)) {
+ err = PTR_ERR(frag);
goto fail;
}
/*
- * Set up data on packet
- */
-
- ip6_copy_metadata(frag, skb);
- skb_reserve(frag, hroom);
- skb_put(frag, len + hlen + sizeof(struct frag_hdr));
- skb_reset_network_header(frag);
- fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
- frag->transport_header = (frag->network_header + hlen +
- sizeof(struct frag_hdr));
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
- if (skb->sk)
- skb_set_owner_w(frag, skb->sk);
-
- /*
- * Copy the packet header into the new buffer.
- */
- skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
-
- fragnexthdr_offset = skb_network_header(frag);
- fragnexthdr_offset += prevhdr - skb_network_header(skb);
- *fragnexthdr_offset = NEXTHDR_FRAGMENT;
-
- /*
- * Build fragment header.
- */
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->identification = frag_id;
-
- /*
- * Copy a block of the IP datagram.
- */
- BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
- len));
- left -= len;
-
- fh->frag_off = htons(offset);
- if (left > 0)
- fh->frag_off |= htons(IP6_MF);
- ipv6_hdr(frag)->payload_len = htons(frag->len -
- sizeof(struct ipv6hdr));
-
- ptr += len;
- offset += len;
-
- /*
* Put this fragment into the sending queue.
*/
err = output(net, sk, frag);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 4c8e2ea8bf19..f874dde1ee85 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1293,8 +1293,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
- neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
- rt->fib6_nh.fib_nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+ rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
@@ -1323,8 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
- rt->fib6_nh.fib_nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+ rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1240ccd57f39..9530cc280953 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -16,6 +16,9 @@
#include <net/ip6_route.h>
#include <net/xfrm.h>
#include <net/netfilter/nf_queue.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include "../bridge/br_private.h"
int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
{
@@ -109,6 +112,122 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
}
EXPORT_SYMBOL_GPL(__nf_ip6_route);
+int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ struct nf_ct_bridge_frag_data *data,
+ int (*output)(struct net *, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *))
+{
+ int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ struct ip6_frag_state state;
+ u8 *prevhdr, nexthdr = 0;
+ unsigned int mtu, hlen;
+ int hroom, err = 0;
+ __be32 frag_id;
+
+ err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ goto blackhole;
+ hlen = err;
+ nexthdr = *prevhdr;
+
+ mtu = skb->dev->mtu;
+ if (frag_max_size > mtu ||
+ frag_max_size < IPV6_MIN_MTU)
+ goto blackhole;
+
+ mtu = frag_max_size;
+ if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+ goto blackhole;
+ mtu -= hlen + sizeof(struct frag_hdr);
+
+ frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (err = skb_checksum_help(skb)))
+ goto blackhole;
+
+ hroom = LL_RESERVED_SPACE(skb->dev);
+ if (skb_has_frag_list(skb)) {
+ unsigned int first_len = skb_pagelen(skb);
+ struct ip6_fraglist_iter iter;
+ struct sk_buff *frag2;
+
+ if (first_len - hlen > mtu ||
+ skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
+ goto blackhole;
+
+ if (skb_cloned(skb))
+ goto slow_path;
+
+ skb_walk_frags(skb, frag2) {
+ if (frag2->len > mtu ||
+ skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
+ goto blackhole;
+
+ /* Partially cloned skb? */
+ if (skb_shared(frag2))
+ goto slow_path;
+ }
+
+ err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+ &iter);
+ if (err < 0)
+ goto blackhole;
+
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down.
+ */
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
+
+ err = output(net, sk, data, skb);
+ if (err || !iter.frag)
+ break;
+
+ skb = ip6_fraglist_next(&iter);
+ }
+
+ kfree(iter.tmp_hdr);
+ if (!err)
+ return 0;
+
+ kfree_skb_list(iter.frag_list);
+ return err;
+ }
+slow_path:
+ /* This is a linearized skbuff, the original geometry is lost for us.
+ * This may also be a clone skbuff, we could preserve the geometry for
+ * the copies but probably not worth the effort.
+ */
+ ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom,
+ LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
+ &state);
+
+ while (state.left > 0) {
+ struct sk_buff *skb2;
+
+ skb2 = ip6_frag_next(skb, &state);
+ if (IS_ERR(skb2)) {
+ err = PTR_ERR(skb2);
+ goto blackhole;
+ }
+
+ err = output(net, sk, data, skb2);
+ if (err)
+ goto blackhole;
+ }
+ consume_skb(skb);
+ return err;
+
+blackhole:
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_ip6_fragment);
+
static const struct nf_ipv6_ops ipv6ops = {
#if IS_MODULE(CONFIG_IPV6)
.chk_addr = ipv6_chk_addr,
@@ -119,6 +238,10 @@ static const struct nf_ipv6_ops ipv6ops = {
.route_input = ip6_route_input,
.fragment = ip6_fragment,
.reroute = nf_ip6_reroute,
+#if IS_MODULE(CONFIG_IPV6)
+ .br_defrag = nf_ct_frag6_gather,
+ .br_fragment = br_ip6_fragment,
+#endif
};
int __init ipv6_netfilter_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3de0e9b0a482..c5d59fa568d6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -58,26 +58,21 @@ static struct inet_frags nf_frags;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
- .data = &init_net.nf_frag.frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
};
@@ -93,15 +88,15 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
GFP_KERNEL);
if (table == NULL)
goto err_alloc;
-
- table[0].data = &net->nf_frag.frags.timeout;
- table[1].data = &net->nf_frag.frags.low_thresh;
- table[1].extra2 = &net->nf_frag.frags.high_thresh;
- table[2].data = &net->nf_frag.frags.high_thresh;
- table[2].extra1 = &net->nf_frag.frags.low_thresh;
- table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
}
+ table[0].data = &net->nf_frag.fqdir->timeout;
+ table[1].data = &net->nf_frag.fqdir->low_thresh;
+ table[1].extra2 = &net->nf_frag.fqdir->high_thresh;
+ table[2].data = &net->nf_frag.fqdir->high_thresh;
+ table[2].extra1 = &net->nf_frag.fqdir->low_thresh;
+ table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh;
+
hdr = register_net_sysctl(net, "net/netfilter", table);
if (hdr == NULL)
goto err_reg;
@@ -148,12 +143,10 @@ static void nf_ct_frag6_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
- struct net *net;
fq = container_of(frag, struct frag_queue, q);
- net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6frag_expire_frag_queue(net, fq);
+ ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
}
/* Creation primitives. */
@@ -169,7 +162,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
};
struct inet_frag_queue *q;
- q = inet_frag_find(&net->nf_frag.frags, &key);
+ q = inet_frag_find(net->nf_frag.fqdir, &key);
if (!q)
return NULL;
@@ -276,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- add_frag_mem_limit(fq->q.net, skb->truesize);
+ add_frag_mem_limit(fq->q.fqdir, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -496,24 +489,24 @@ static int nf_ct_net_init(struct net *net)
{
int res;
- net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- net->nf_frag.frags.f = &nf_frags;
-
- res = inet_frags_init_net(&net->nf_frag.frags);
+ res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
if (res < 0)
return res;
+
+ net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
res = nf_ct_frag6_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->nf_frag.frags);
+ fqdir_exit(net->nf_frag.fqdir);
return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags);
+ fqdir_exit(net->nf_frag.fqdir);
}
static struct pernet_operations nf_ct_net_ops = {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 2356b4af7309..0bbefc440bcd 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -48,8 +48,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
- atomic_read(&net->ipv6.frags.rhashtable.nelems),
- frag_mem_limit(&net->ipv6.frags));
+ atomic_read(&net->ipv6.fqdir->rhashtable.nelems),
+ frag_mem_limit(net->ipv6.fqdir));
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1a832f5e190b..ff5b6d8de2c6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -76,12 +76,10 @@ static void ip6_frag_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
- struct net *net;
fq = container_of(frag, struct frag_queue, q);
- net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6frag_expire_frag_queue(net, fq);
+ ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
}
static struct frag_queue *
@@ -100,7 +98,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
IPV6_ADDR_LINKLOCAL)))
key.iif = 0;
- q = inet_frag_find(&net->ipv6.frags, &key);
+ q = inet_frag_find(net->ipv6.fqdir, &key);
if (!q)
return NULL;
@@ -200,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
- add_frag_mem_limit(fq->q.net, skb->truesize);
+ add_frag_mem_limit(fq->q.fqdir, skb->truesize);
fragsize = -skb_network_offset(skb) + skb->len;
if (fragsize > fq->q.max_size)
@@ -254,7 +252,7 @@ err:
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
struct sk_buff *prev_tail, struct net_device *dev)
{
- struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+ struct net *net = fq->q.fqdir->net;
unsigned int nhoff;
void *reasm_data;
int payload_len;
@@ -401,23 +399,18 @@ static const struct inet6_protocol frag_protocol = {
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
- .data = &init_net.ipv6.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
- .data = &init_net.ipv6.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra2 = &init_net.ipv6.frags.high_thresh
},
{
.procname = "ip6frag_time",
- .data = &init_net.ipv6.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -449,12 +442,12 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
if (!table)
goto err_alloc;
- table[0].data = &net->ipv6.frags.high_thresh;
- table[0].extra1 = &net->ipv6.frags.low_thresh;
- table[1].data = &net->ipv6.frags.low_thresh;
- table[1].extra2 = &net->ipv6.frags.high_thresh;
- table[2].data = &net->ipv6.frags.timeout;
}
+ table[0].data = &net->ipv6.fqdir->high_thresh;
+ table[0].extra1 = &net->ipv6.fqdir->low_thresh;
+ table[1].data = &net->ipv6.fqdir->low_thresh;
+ table[1].extra2 = &net->ipv6.fqdir->high_thresh;
+ table[2].data = &net->ipv6.fqdir->timeout;
hdr = register_net_sysctl(net, "net/ipv6", table);
if (!hdr)
@@ -517,25 +510,24 @@ static int __net_init ipv6_frags_init_net(struct net *net)
{
int res;
- net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
- net->ipv6.frags.f = &ip6_frags;
-
- res = inet_frags_init_net(&net->ipv6.frags);
+ res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
if (res < 0)
return res;
+ net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
res = ip6_frags_ns_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->ipv6.frags);
+ fqdir_exit(net->ipv6.fqdir);
return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&net->ipv6.frags);
+ fqdir_exit(net->ipv6.fqdir);
}
static struct pernet_operations ip6_frags_ops = {
@@ -591,8 +583,8 @@ err_protocol:
void ipv6_frag_exit(void)
{
- inet_frags_fini(&ip6_frags);
ip6_frags_sysctl_unregister();
unregister_pernet_subsys(&ip6_frags_ops);
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ inet_frags_fini(&ip6_frags);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 848e944f07df..fada5a13bcb2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -441,12 +441,12 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
+ if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) {
- const struct fib6_nh *nh = &sibling->fib6_nh;
+ const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
@@ -460,7 +460,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
out:
res->f6i = match;
- res->nh = &match->fib6_nh;
+ res->nh = match->fib6_nh;
}
/*
@@ -496,13 +496,13 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
struct fib6_nh *nh;
if (!oif && ipv6_addr_any(saddr)) {
- nh = &f6i->fib6_nh;
+ nh = f6i->fib6_nh;
if (!(nh->fib_nh_flags & RTNH_F_DEAD))
goto out;
}
for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
- nh = &spf6i->fib6_nh;
+ nh = spf6i->fib6_nh;
if (__rt6_device_match(net, nh, saddr, oif, flags)) {
res->f6i = spf6i;
goto out;
@@ -511,14 +511,14 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
if (oif && flags & RT6_LOOKUP_F_IFACE) {
res->f6i = net->ipv6.fib6_null_entry;
- nh = &res->f6i->fib6_nh;
+ nh = res->f6i->fib6_nh;
goto out;
}
- nh = &f6i->fib6_nh;
+ nh = f6i->fib6_nh;
if (nh->fib_nh_flags & RTNH_F_DEAD) {
res->f6i = net->ipv6.fib6_null_entry;
- nh = &res->f6i->fib6_nh;
+ nh = res->f6i->fib6_nh;
}
out:
res->nh = nh;
@@ -714,7 +714,7 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
if (fib6_check_expired(f6i))
continue;
- nh = &f6i->fib6_nh;
+ nh = f6i->fib6_nh;
if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
res->f6i = f6i;
res->nh = nh;
@@ -796,7 +796,7 @@ static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
out:
if (!res->f6i) {
res->f6i = net->ipv6.fib6_null_entry;
- res->nh = &res->f6i->fib6_nh;
+ res->nh = res->f6i->fib6_nh;
res->fib6_flags = res->f6i->fib6_flags;
res->fib6_type = res->f6i->fib6_type;
}
@@ -1270,7 +1270,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, **p;
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
pcpu_rt = *p;
if (pcpu_rt)
@@ -1291,7 +1291,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
}
dst_hold(&pcpu_rt->dst);
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
@@ -1461,25 +1461,74 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
+#define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL
+
+/* used when the flushed bit is not relevant, only access to the bucket
+ * (ie., all bucket users except rt6_insert_exception);
+ *
+ * called under rcu lock; sometimes called with rt6_exception_lock held
+ */
+static
+struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
+ spinlock_t *lock)
+{
+ struct rt6_exception_bucket *bucket;
+
+ if (lock)
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(lock));
+ else
+ bucket = rcu_dereference(nh->rt6i_exception_bucket);
+
+ /* remove bucket flushed bit if set */
+ if (bucket) {
+ unsigned long p = (unsigned long)bucket;
+
+ p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
+ bucket = (struct rt6_exception_bucket *)p;
+ }
+
+ return bucket;
+}
+
+static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
+{
+ unsigned long p = (unsigned long)bucket;
+
+ return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
+}
+
+/* called with rt6_exception_lock held */
+static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
+ spinlock_t *lock)
+{
+ struct rt6_exception_bucket *bucket;
+ unsigned long p;
+
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(lock));
+
+ p = (unsigned long)bucket;
+ p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
+ bucket = (struct rt6_exception_bucket *)p;
+ rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+}
+
static int rt6_insert_exception(struct rt6_info *nrt,
const struct fib6_result *res)
{
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
+ struct fib6_info *f6i = res->f6i;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *f6i = res->f6i;
+ struct fib6_nh *nh = res->nh;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
- if (f6i->exception_bucket_flushed) {
- err = -EINVAL;
- goto out;
- }
-
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
GFP_ATOMIC);
@@ -1487,7 +1536,10 @@ static int rt6_insert_exception(struct rt6_info *nrt,
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+ } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
+ err = -EINVAL;
+ goto out;
}
#ifdef CONFIG_IPV6_SUBTREES
@@ -1542,7 +1594,7 @@ out:
return err;
}
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
@@ -1550,25 +1602,33 @@ void rt6_flush_exceptions(struct fib6_info *rt)
int i;
spin_lock_bh(&rt6_exception_lock);
- /* Prevent rt6_insert_exception() to recreate the bucket list */
- rt->exception_bucket_flushed = 1;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (!bucket)
goto out;
+ /* Prevent rt6_insert_exception() to recreate the bucket list */
+ if (!from)
+ fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
+
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
- rt6_remove_exception(bucket, rt6_ex);
- WARN_ON_ONCE(bucket->depth);
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+ if (!from ||
+ rcu_access_pointer(rt6_ex->rt6i->from) == from)
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ WARN_ON_ONCE(!from && bucket->depth);
bucket++;
}
-
out:
spin_unlock_bh(&rt6_exception_lock);
}
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+ fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+}
+
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
@@ -1597,7 +1657,7 @@ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
src_key = saddr;
find_ex:
#endif
- bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
+ bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
@@ -1615,25 +1675,20 @@ find_ex:
}
/* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
{
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *from;
int err;
- from = rcu_dereference(rt->from);
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return -EINVAL;
-
- if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return -ENOENT;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
+
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
* and exception table is indexed by a hash of
@@ -1641,7 +1696,7 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_spinlock(&bucket,
@@ -1658,23 +1713,29 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
return err;
}
-/* Find rt6_ex which contains the passed in rt cache and
- * refresh its stamp
- */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+static int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
- struct rt6_exception *rt6_ex;
struct fib6_info *from;
- rcu_read_lock();
from = rcu_dereference(rt->from);
if (!from || !(rt->rt6i_flags & RTF_CACHE))
- goto unlock;
+ return -EINVAL;
- bucket = rcu_dereference(from->rt6i_exception_bucket);
+ return fib6_nh_remove_exception(from->fib6_nh,
+ from->fib6_src.plen, rt);
+}
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
+{
+ const struct in6_addr *src_key = NULL;
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+
+ bucket = fib6_nh_get_excptn_bucket(nh, NULL);
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
* and exception table is indexed by a hash of
@@ -1682,15 +1743,25 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
- rt6_ex = __rt6_find_exception_rcu(&bucket,
- &rt->rt6i_dst.addr,
- src_key);
+ rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
if (rt6_ex)
rt6_ex->stamp = jiffies;
+}
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct fib6_info *from;
+
+ rcu_read_lock();
+
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
+ fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
@@ -1718,15 +1789,13 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
}
static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
- struct fib6_info *rt, int mtu)
+ const struct fib6_nh *nh, int mtu)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
int i;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (!bucket)
return;
@@ -1748,21 +1817,19 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
-static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
- struct in6_addr *gateway)
+static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
+ const struct in6_addr *gateway)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (bucket) {
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1827,23 +1894,21 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
gc_args->more++;
}
-void rt6_age_exceptions(struct fib6_info *rt,
- struct fib6_gc_args *gc_args,
- unsigned long now)
+static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return;
rcu_read_lock_bh();
spin_lock(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (bucket) {
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1858,6 +1923,13 @@ void rt6_age_exceptions(struct fib6_info *rt,
rcu_read_unlock_bh();
}
+void rt6_age_exceptions(struct fib6_info *f6i,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+}
+
/* must be called with rcu lock held */
int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, struct fib6_result *res, int strict)
@@ -2384,7 +2456,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_unlock();
return;
}
- res.nh = &res.f6i->fib6_nh;
+ res.nh = res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
@@ -2533,7 +2605,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
restart:
for_each_fib6_node_rt_rcu(fn) {
res.f6i = rt;
- res.nh = &rt->fib6_nh;
+ res.nh = rt->fib6_nh;
if (fib6_check_expired(rt))
continue;
@@ -2557,7 +2629,7 @@ restart:
}
res.f6i = rt;
- res.nh = &rt->fib6_nh;
+ res.nh = rt->fib6_nh;
out:
if (ret) {
ip6_hold_safe(net, &ret);
@@ -3074,6 +3146,12 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
!netif_carrier_ok(dev))
fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+ if (!fib6_nh->rt6i_pcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
cfg->fc_encap_type, cfg, gfp_flags, extack);
if (err)
@@ -3098,6 +3176,38 @@ out:
void fib6_nh_release(struct fib6_nh *fib6_nh)
{
+ struct rt6_exception_bucket *bucket;
+
+ rcu_read_lock();
+
+ fib6_nh_flush_exceptions(fib6_nh, NULL);
+ bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
+ if (bucket) {
+ rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
+ kfree(bucket);
+ }
+
+ rcu_read_unlock();
+
+ if (fib6_nh->rt6i_pcpu) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ *ppcpu_rt = NULL;
+ }
+ }
+
+ free_percpu(fib6_nh->rt6i_pcpu);
+ }
+
fib_nh_common_release(&fib6_nh->nh_common);
}
@@ -3160,7 +3270,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags);
+ rt = fib6_info_alloc(gfp_flags, true);
if (!rt)
goto out;
@@ -3200,7 +3310,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
+ err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
if (err)
goto out;
@@ -3208,7 +3318,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
* they would result in kernel looping; promote them to reject routes
*/
addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
+ if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type))
rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
@@ -3326,7 +3436,7 @@ out_put:
return err;
}
-static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
{
int rc = -ESRCH;
@@ -3342,10 +3452,25 @@ out:
return rc;
}
+static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
+ struct fib6_nh *nh)
+{
+ struct fib6_result res = {
+ .f6i = rt,
+ .nh = nh,
+ };
+ struct rt6_info *rt_cache;
+
+ rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
+ if (rt_cache)
+ return __ip6_del_cached_rt(rt_cache, cfg);
+
+ return 0;
+}
+
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
- struct rt6_info *rt_cache;
struct fib6_table *table;
struct fib6_info *rt;
struct fib6_node *fn;
@@ -3368,26 +3493,18 @@ static int ip6_route_del(struct fib6_config *cfg,
for_each_fib6_node_rt_rcu(fn) {
struct fib6_nh *nh;
+ nh = rt->fib6_nh;
if (cfg->fc_flags & RTF_CACHE) {
- struct fib6_result res = {
- .f6i = rt,
- };
int rc;
- rt_cache = rt6_find_cached_rt(&res,
- &cfg->fc_dst,
- &cfg->fc_src);
- if (rt_cache) {
- rc = ip6_del_cached_rt(rt_cache, cfg);
- if (rc != -ESRCH) {
- rcu_read_unlock();
- return rc;
- }
+ rc = ip6_del_cached_rt(cfg, rt, nh);
+ if (rc != -ESRCH) {
+ rcu_read_unlock();
+ return rc;
}
continue;
}
- nh = &rt->fib6_nh;
if (cfg->fc_ifindex &&
(!nh->fib_nh_dev ||
nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
@@ -3509,7 +3626,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (!res.f6i)
goto out;
- res.nh = &res.f6i->fib6_nh;
+ res.nh = res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
@@ -3561,12 +3678,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
+ if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
- !rt->fib6_nh.fib_nh_gw_family)
+ !rt->fib6_nh->fib_nh_gw_family)
continue;
- if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
+ if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
continue;
if (!fib6_info_hold_safe(rt))
continue;
@@ -3624,7 +3741,7 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- struct fib6_nh *nh = &rt->fib6_nh;
+ struct fib6_nh *nh = rt->fib6_nh;
if (dev == nh->fib_nh_dev &&
((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -3876,7 +3993,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
+ if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
@@ -3904,18 +4021,17 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
+ struct fib6_nh *nh = rt->fib6_nh;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- rt->fib6_nh.fib_nh_gw_family &&
- ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
+ nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
return -1;
- }
/* Further clean up cached routes in exception table.
* This is needed because cached route may have a different
* gateway than its 'parent' in the case of an ip redirect.
*/
- rt6_exceptions_clean_tohost(rt, gateway);
+ fib6_nh_exceptions_clean_tohost(nh, gateway);
return 0;
}
@@ -3955,9 +4071,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
static bool rt6_is_dead(const struct fib6_info *rt)
{
- if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
- (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
- ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
+ if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
+ (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
+ ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
return true;
return false;
@@ -3969,11 +4085,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt)
int total = 0;
if (!rt6_is_dead(rt))
- total += rt->fib6_nh.fib_nh_weight;
+ total += rt->fib6_nh->fib_nh_weight;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (!rt6_is_dead(iter))
- total += iter->fib6_nh.fib_nh_weight;
+ total += iter->fib6_nh->fib_nh_weight;
}
return total;
@@ -3984,11 +4100,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
int upper_bound = -1;
if (!rt6_is_dead(rt)) {
- *weight += rt->fib6_nh.fib_nh_weight;
+ *weight += rt->fib6_nh->fib_nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1;
}
- atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
+ atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
}
static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
@@ -4032,8 +4148,8 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
struct net *net = dev_net(arg->dev);
if (rt != net->ipv6.fib6_null_entry &&
- rt->fib6_nh.fib_nh_dev == arg->dev) {
- rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
+ rt->fib6_nh->fib_nh_dev == arg->dev) {
+ rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
rt6_multipath_rebalance(rt);
}
@@ -4061,10 +4177,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
{
struct fib6_info *iter;
- if (rt->fib6_nh.fib_nh_dev == dev)
+ if (rt->fib6_nh->fib_nh_dev == dev)
return true;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == dev)
+ if (iter->fib6_nh->fib_nh_dev == dev)
return true;
return false;
@@ -4085,12 +4201,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
struct fib6_info *iter;
unsigned int dead = 0;
- if (rt->fib6_nh.fib_nh_dev == down_dev ||
- rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh->fib_nh_dev == down_dev ||
+ rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
dead++;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == down_dev ||
- iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (iter->fib6_nh->fib_nh_dev == down_dev ||
+ iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
dead++;
return dead;
@@ -4102,11 +4218,11 @@ static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
{
struct fib6_info *iter;
- if (rt->fib6_nh.fib_nh_dev == dev)
- rt->fib6_nh.fib_nh_flags |= nh_flags;
+ if (rt->fib6_nh->fib_nh_dev == dev)
+ rt->fib6_nh->fib_nh_flags |= nh_flags;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == dev)
- iter->fib6_nh.fib_nh_flags |= nh_flags;
+ if (iter->fib6_nh->fib_nh_dev == dev)
+ iter->fib6_nh->fib_nh_flags |= nh_flags;
}
/* called with write lock held for table with rt */
@@ -4121,12 +4237,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
switch (arg->event) {
case NETDEV_UNREGISTER:
- return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
case NETDEV_DOWN:
if (rt->should_flush)
return -1;
if (!rt->fib6_nsiblings)
- return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
if (rt6_multipath_uses_dev(rt, dev)) {
unsigned int count;
@@ -4142,10 +4258,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
}
return -2;
case NETDEV_CHANGE:
- if (rt->fib6_nh.fib_nh_dev != dev ||
+ if (rt->fib6_nh->fib_nh_dev != dev ||
rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
- rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
+ rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
rt6_multipath_rebalance(rt);
break;
}
@@ -4179,9 +4295,36 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event)
struct rt6_mtu_change_arg {
struct net_device *dev;
unsigned int mtu;
+ struct fib6_info *f6i;
};
-static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
+static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
+{
+ struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
+ struct fib6_info *f6i = arg->f6i;
+
+ /* For administrative MTU increase, there is no way to discover
+ * IPv6 PMTU increase, so PMTU increase should be updated here.
+ * Since RFC 1981 doesn't include administrative MTU increase
+ * update PMTU increase is a MUST. (i.e. jumbo frame)
+ */
+ if (nh->fib_nh_dev == arg->dev) {
+ struct inet6_dev *idev = __in6_dev_get(arg->dev);
+ u32 mtu = f6i->fib6_pmtu;
+
+ if (mtu >= arg->mtu ||
+ (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+ fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
+
+ spin_lock_bh(&rt6_exception_lock);
+ rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
+ }
+
+ return 0;
+}
+
+static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
@@ -4196,24 +4339,11 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
if (!idev)
return 0;
- /* For administrative MTU increase, there is no way to discover
- IPv6 PMTU increase, so PMTU increase should be updated here.
- Since RFC 1981 doesn't include administrative MTU increase
- update PMTU increase is a MUST. (i.e. jumbo frame)
- */
- if (rt->fib6_nh.fib_nh_dev == arg->dev &&
- !fib6_metric_locked(rt, RTAX_MTU)) {
- u32 mtu = rt->fib6_pmtu;
-
- if (mtu >= arg->mtu ||
- (mtu < arg->mtu && mtu == idev->cnf.mtu6))
- fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+ if (fib6_metric_locked(f6i, RTAX_MTU))
+ return 0;
- spin_lock_bh(&rt6_exception_lock);
- rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
- spin_unlock_bh(&rt6_exception_lock);
- }
- return 0;
+ arg->f6i = f6i;
+ return fib6_nh_mtu_change(f6i->fib6_nh, arg);
}
void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
@@ -4227,6 +4357,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
}
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+ [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
[RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
@@ -4492,7 +4623,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
- rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
+ rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
rt, &r_cfg);
@@ -4659,7 +4790,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
+ + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws);
nexthop_len *= rt->fib6_nsiblings;
}
@@ -4677,7 +4808,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
+ + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws)
+ nexthop_len;
}
@@ -4797,14 +4928,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (!mp)
goto nla_put_failure;
- if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
- rt->fib6_nh.fib_nh_weight) < 0)
+ if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
+ rt->fib6_nh->fib_nh_weight) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
- if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
- sibling->fib6_nh.fib_nh_weight) < 0)
+ if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
+ sibling->fib6_nh->fib_nh_weight) < 0)
goto nla_put_failure;
}
@@ -4812,7 +4943,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
} else {
unsigned char nh_flags = 0;
- if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
+ if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
&nh_flags, false) < 0)
goto nla_put_failure;
@@ -4842,7 +4973,7 @@ nla_put_failure:
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
- if (f6i->fib6_nh.fib_nh_dev == dev)
+ if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
if (f6i->fib6_nsiblings) {
@@ -4850,7 +4981,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
list_for_each_entry_safe(sibling, next_sibling,
&f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh.fib_nh_dev == dev)
+ if (sibling->fib6_nh->fib_nh_dev == dev)
return true;
}
}
@@ -5129,6 +5260,38 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+ struct nl_info *info)
+{
+ u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ /* call_fib6_entry_notifiers will be removed when in-kernel notifier
+ * is implemented and supported for nexthop objects
+ */
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
+
+ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ if (!skb)
+ goto errout;
+
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+ RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
+ info->nlh, gfp_any());
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
static int ip6_route_dev_notify(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -5139,7 +5302,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
return NOTIFY_OK;
if (event == NETDEV_REGISTER) {
- net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
+ net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5333,11 +5496,11 @@ static int __net_init ip6_route_net_init(struct net *net)
if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
goto out_ip6_dst_ops;
- net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
- sizeof(*net->ipv6.fib6_null_entry),
- GFP_KERNEL);
+ net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
if (!net->ipv6.fib6_null_entry)
goto out_ip6_dst_entries;
+ memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
+ sizeof(*net->ipv6.fib6_null_entry));
net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
sizeof(*net->ipv6.ip6_null_entry),
@@ -5474,7 +5637,7 @@ void __init ip6_route_init_special_entries(void)
/* Registering of the loopback is done before this portion of code,
* the loopback reference in rt6_info will not be taken, do it
* manually for init_net */
- init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
+ init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES