diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2013-05-01 17:47:44 +0200 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2013-05-01 17:47:44 +0200 |
commit | bf61c8840efe60fd8f91446860b63338fb424158 (patch) | |
tree | 7a71832407a4f0d6346db773343f4c3ae2257b19 /net/ipv6/route.c | |
parent | Input: wacom - fix "can not retrieve extra class descriptor" for DTH2242 (diff) | |
parent | Input: trackpoint - Optimize trackpoint init to use power-on reset (diff) | |
download | linux-bf61c8840efe60fd8f91446860b63338fb424158.tar.xz linux-bf61c8840efe60fd8f91446860b63338fb424158.zip |
Merge branch 'next' into for-linus
Prepare first set of updates for 3.10 merge window.
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 394 |
1 files changed, 212 insertions, 182 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b1e6cf0b95fd..e5fe0041adfa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -57,6 +57,7 @@ #include <net/xfrm.h> #include <net/netevent.h> #include <net/netlink.h> +#include <net/nexthop.h> #include <asm/uaccess.h> @@ -144,25 +145,12 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, struct neighbour *n; daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); + n = __ipv6_neigh_lookup(dst->dev, daddr); if (n) return n; return neigh_create(&nd_tbl, daddr, dst->dev); } -static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) -{ - struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); - if (!n) { - n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(n)) - return PTR_ERR(n); - } - rt->n = n; - - return 0; -} - static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -289,6 +277,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); rt->rt6i_genid = rt_genid(net); + INIT_LIST_HEAD(&rt->rt6i_siblings); + rt->rt6i_nsiblings = 0; } return rt; } @@ -297,9 +287,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - - if (rt->n) - neigh_release(rt->n); + struct dst_entry *from = dst->from; if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -309,8 +297,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) - dst_release(dst->from); + dst->from = NULL; + dst_release(from); if (rt6_has_peer(rt)) { struct inet_peer *peer = rt6_peer_ptr(rt); @@ -318,13 +306,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); - -static u32 rt6_peer_genid(void) -{ - return atomic_read(&__rt6_peer_genid); -} - void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer_base *base; @@ -338,8 +319,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create) if (peer) { if (!rt6_set_peer(rt, peer)) inet_putpeer(peer); - else - rt->rt6i_peer_genid = rt6_peer_genid(); } } @@ -360,11 +339,6 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, in6_dev_put(idev); } } - if (rt->n && rt->n->dev == dev) { - rt->n->dev = loopback_dev; - dev_hold(loopback_dev); - dev_put(dev); - } } } @@ -385,6 +359,62 @@ static bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +/* Multipath route selection: + * Hash based function using packet header and flowlabel. + * Adapted from fib_info_hashfn() + */ +static int rt6_info_hash_nhsfn(unsigned int candidate_count, + const struct flowi6 *fl6) +{ + unsigned int val = fl6->flowi6_proto; + + val ^= ipv6_addr_hash(&fl6->daddr); + val ^= ipv6_addr_hash(&fl6->saddr); + + /* Work only if this not encapsulated */ + switch (fl6->flowi6_proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_SCTP: + val ^= (__force u16)fl6->fl6_sport; + val ^= (__force u16)fl6->fl6_dport; + break; + + case IPPROTO_ICMPV6: + val ^= (__force u16)fl6->fl6_icmp_type; + val ^= (__force u16)fl6->fl6_icmp_code; + break; + } + /* RFC6438 recommands to use flowlabel */ + val ^= (__force u32)fl6->flowlabel; + + /* Perhaps, we need to tune, this function? */ + val = val ^ (val >> 7) ^ (val >> 12); + return val % candidate_count; +} + +static struct rt6_info *rt6_multipath_select(struct rt6_info *match, + struct flowi6 *fl6) +{ + struct rt6_info *sibling, *next_sibling; + int route_choosen; + + route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); + /* Don't change the route, if route_choosen == 0 + * (siblings does not include ourself) + */ + if (route_choosen) + list_for_each_entry_safe(sibling, next_sibling, + &match->rt6i_siblings, rt6i_siblings) { + route_choosen--; + if (route_choosen == 0) { + match = sibling; + break; + } + } + return match; +} + /* * Route lookup. Any table->tb6_lock is implied. */ @@ -448,24 +478,34 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - neigh = rt ? rt->n : NULL; - if (!neigh || (neigh->nud_state & NUD_VALID)) + if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) return; - read_lock_bh(&neigh->lock); - if (!(neigh->nud_state & NUD_VALID) && + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + write_lock(&neigh->lock); + if (neigh->nud_state & NUD_VALID) + goto out; + } + + if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { struct in6_addr mcaddr; struct in6_addr *target; - neigh->updated = jiffies; - read_unlock_bh(&neigh->lock); + if (neigh) { + neigh->updated = jiffies; + write_unlock(&neigh->lock); + } - target = (struct in6_addr *)&neigh->primary_key; + target = (struct in6_addr *)&rt->rt6i_gateway; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); } else { - read_unlock_bh(&neigh->lock); +out: + write_unlock(&neigh->lock); } + rcu_read_unlock_bh(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -487,35 +527,36 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline int rt6_check_neigh(struct rt6_info *rt) +static inline bool rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - int m; + bool ret = false; - neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - m = 1; - else if (neigh) { - read_lock_bh(&neigh->lock); + return true; + + rcu_read_lock_bh(); + neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + if (neigh) { + read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) - m = 2; + ret = true; #ifdef CONFIG_IPV6_ROUTER_PREF - else if (neigh->nud_state & NUD_FAILED) - m = 0; + else if (!(neigh->nud_state & NUD_FAILED)) + ret = true; #endif - else - m = 1; - read_unlock_bh(&neigh->lock); - } else - m = 0; - return m; + read_unlock(&neigh->lock); + } + rcu_read_unlock_bh(); + + return ret; } static int rt6_score_route(struct rt6_info *rt, int oif, int strict) { - int m, n; + int m; m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) @@ -523,8 +564,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - n = rt6_check_neigh(rt); - if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) + if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) return -1; return m; } @@ -666,7 +706,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, else rt6_set_expires(rt, jiffies + HZ * lifetime); - dst_release(&rt->dst); + ip6_rt_put(rt); } return 0; } @@ -702,6 +742,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, restart: rt = fn->leaf; rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); + if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); @@ -783,8 +825,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt = ip6_rt_copy(ort, daddr); if (rt) { - int attempts = !in_softirq(); - if (!(rt->rt6i_flags & RTF_GATEWAY)) { if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) @@ -800,32 +840,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, rt->rt6i_src.plen = 128; } #endif - - retry: - if (rt6_bind_neighbour(rt, rt->dst.dev)) { - struct net *net = dev_net(rt->dst.dev); - int saved_rt_min_interval = - net->ipv6.sysctl.ip6_rt_gc_min_interval; - int saved_rt_elasticity = - net->ipv6.sysctl.ip6_rt_gc_elasticity; - - if (attempts-- > 0) { - net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; - net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; - - ip6_dst_gc(&net->ipv6.ip6_dst_ops); - - net->ipv6.sysctl.ip6_rt_gc_elasticity = - saved_rt_elasticity; - net->ipv6.sysctl.ip6_rt_gc_min_interval = - saved_rt_min_interval; - goto retry; - } - - net_warn_ratelimited("Neighbour table overflow\n"); - dst_free(&rt->dst); - return NULL; - } } return rt; @@ -836,10 +850,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, { struct rt6_info *rt = ip6_rt_copy(ort, daddr); - if (rt) { + if (rt) rt->rt6i_flags |= RTF_CACHE; - rt->n = neigh_clone(ort->n); - } return rt; } @@ -863,7 +875,8 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - + if (rt->rt6i_nsiblings && oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -872,14 +885,14 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); else goto out2; - dst_release(&rt->dst); + ip6_rt_put(rt); rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->dst); @@ -896,7 +909,7 @@ restart: * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ - dst_release(&rt->dst); + ip6_rt_put(rt); goto relookup; out: @@ -938,7 +951,7 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, + .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, }; @@ -998,7 +1011,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags; - rt6_clean_expires(rt); rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); @@ -1030,14 +1042,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { - if (rt->rt6i_peer_genid != rt6_peer_genid()) { - if (!rt6_has_peer(rt)) - rt6_bind_peer(rt, 0); - rt->rt6i_peer_genid = rt6_peer_genid(); - } + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) return dst; - } + return NULL; } @@ -1108,7 +1115,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1136,7 +1143,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; - fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + fl6.flowlabel = ip6_flowinfo(iph); dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1196,7 +1203,6 @@ static struct dst_entry *icmp6_dst_gc_list; static DEFINE_SPINLOCK(icmp6_dst_lock); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, - struct neighbour *neigh, struct flowi6 *fl6) { struct dst_entry *dst; @@ -1214,20 +1220,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, goto out; } - if (neigh) - neigh_hold(neigh); - else { - neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); - if (IS_ERR(neigh)) { - in6_dev_put(idev); - dst_free(&rt->dst); - return ERR_CAST(neigh); - } - } - rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1316,12 +1310,6 @@ out: return entries > rt_max_size; } -/* Clean host part of a prefix. Not necessary in radix tree, - but results in cleaner routing tables. - - Remove it only when all the things will work! - */ - int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); @@ -1507,7 +1495,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; if (dev) { if (dev != grt->dst.dev) { - dst_release(&grt->dst); + ip6_rt_put(grt); goto out; } } else { @@ -1518,7 +1506,7 @@ int ip6_route_add(struct fib6_config *cfg) } if (!(grt->rt6i_flags & RTF_GATEWAY)) err = 0; - dst_release(&grt->dst); + ip6_rt_put(grt); if (err) goto out; @@ -1542,12 +1530,6 @@ int ip6_route_add(struct fib6_config *cfg) } else rt->rt6i_prefsrc.plen = 0; - if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - err = rt6_bind_neighbour(rt, dev); - if (err) - goto out; - } - rt->rt6i_flags = cfg->fc_flags; install_route: @@ -1604,7 +1586,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) write_unlock_bh(&table->tb6_lock); out: - dst_release(&rt->dst); + ip6_rt_put(rt); return err; } @@ -1660,37 +1642,32 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; struct rt6_info *rt, *nrt = NULL; - const struct in6_addr *target; struct ndisc_options ndopts; - const struct in6_addr *dest; - struct neighbour *old_neigh; struct inet6_dev *in6_dev; struct neighbour *neigh; - struct icmp6hdr *icmph; + struct rd_msg *msg; int optlen, on_link; u8 *lladdr; optlen = skb->tail - skb->transport_header; - optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + optlen -= sizeof(*msg); if (optlen < 0) { net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); return; } - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; + msg = (struct rd_msg *)icmp6_hdr(skb); - if (ipv6_addr_is_multicast(dest)) { + if (ipv6_addr_is_multicast(&msg->dest)) { net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); return; } on_link = 0; - if (ipv6_addr_equal(dest, target)) { + if (ipv6_addr_equal(&msg->dest, &msg->target)) { on_link = 1; - } else if (ipv6_addr_type(target) != + } else if (ipv6_addr_type(&msg->target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); return; @@ -1707,7 +1684,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * first-hop router for the specified ICMP Destination Address. */ - if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { + if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); return; } @@ -1734,15 +1711,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu */ dst_confirm(&rt->dst); - neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); if (!neigh) return; - /* Duplicate redirect: silently ignore. */ - old_neigh = rt->n; - if (neigh == old_neigh) - goto out; - /* * We have finally decided to accept it. */ @@ -1754,7 +1726,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)) ); - nrt = ip6_rt_copy(rt, dest); + nrt = ip6_rt_copy(rt, &msg->dest); if (!nrt) goto out; @@ -1763,16 +1735,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->dst; - netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; - netevent.new_neigh = neigh; - netevent.daddr = dest; + netevent.daddr = &msg->dest; + netevent.neigh = neigh; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { @@ -1814,8 +1784,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) rt6_set_from(rt, ort); - else - rt6_clean_expires(rt); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -1947,7 +1915,8 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); @@ -1987,7 +1956,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch(cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = copy_from_user(&rtmsg, arg, sizeof(struct in6_rtmsg)); @@ -2078,7 +2047,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - int err; if (!rt) { net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); @@ -2097,11 +2065,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - err = rt6_bind_neighbour(rt, rt->dst.dev); - if (err) { - dst_free(&rt->dst); - return ERR_PTR(err); - } rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; @@ -2249,6 +2212,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2326,11 +2290,71 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_TABLE]) cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); + if (tb[RTA_MULTIPATH]) { + cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); + cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + } + err = 0; errout: return err; } +static int ip6_route_multipath(struct fib6_config *cfg, int add) +{ + struct fib6_config r_cfg; + struct rtnexthop *rtnh; + int remaining; + int attrlen; + int err = 0, last_err = 0; + +beginning: + rtnh = (struct rtnexthop *)cfg->fc_mp; + remaining = cfg->fc_mp_len; + + /* Parse a Multipath Entry */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_flags |= RTF_GATEWAY; + } + } + err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); + if (err) { + last_err = err; + /* If we are trying to remove a route, do not stop the + * loop when ip6_route_del() fails (because next hop is + * already gone), we should try to remove all next hops. + */ + if (add) { + /* If add fails, we should try to delete all + * next hops that have been already added. + */ + add = 0; + goto beginning; + } + } + /* Because each route is added like a single route we remove + * this flag after the first nexthop (if there is a collision, + * we have already fail to add the first nexthop: + * fib6_add_rt2node() has reject it). + */ + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; + rtnh = rtnh_next(rtnh, &remaining); + } + + return last_err; +} + static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct fib6_config cfg; @@ -2340,7 +2364,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_del(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 0); + else + return ip6_route_del(&cfg); } static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -2352,7 +2379,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_add(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 1); + else + return ip6_route_add(&cfg); } static inline size_t rt6_nlmsg_size(void) @@ -2380,7 +2410,6 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; - struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2493,9 +2522,8 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - n = rt->n; - if (n) { - if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) + if (rt->rt6i_flags & RTF_GATEWAY) { + if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) goto nla_put_failure; } @@ -2596,7 +2624,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { - dst_release(&rt->dst); + ip6_rt_put(rt); err = -ENOBUFS; goto errout; } @@ -2690,7 +2718,6 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; - struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2699,9 +2726,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - n = rt->n; - if (n) { - seq_printf(m, "%pi6", n->primary_key); + if (rt->rt6i_flags & RTF_GATEWAY) { + seq_printf(m, "%pi6", &rt->rt6i_gateway); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -2873,6 +2899,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; } return table; @@ -2964,8 +2994,8 @@ static void __net_exit ip6_route_net_exit(struct net *net) static int __net_init ip6_route_net_init_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); + proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); #endif return 0; } @@ -2973,8 +3003,8 @@ static int __net_init ip6_route_net_init_late(struct net *net) static void __net_exit ip6_route_net_exit_late(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); + remove_proc_entry("ipv6_route", net->proc_net); + remove_proc_entry("rt6_stats", net->proc_net); #endif } |