diff options
author | Peter Nørlund <pch@ordbogen.com> | 2015-09-30 10:12:22 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-05 12:00:04 +0200 |
commit | 79a131592dbb81a2dba208622a2ffbfc53f28bc0 (patch) | |
tree | 16eb6cd39f9475f87223cbca93da02f1853570bd /net/ipv4/route.c | |
parent | ipv4: L3 hash-based multipath (diff) | |
download | linux-79a131592dbb81a2dba208622a2ffbfc53f28bc0.tar.xz linux-79a131592dbb81a2dba208622a2ffbfc53f28bc0.zip |
ipv4: ICMP packet inspection for multipath
ICMP packets are inspected to let them route together with the flow they
belong to, minimizing the chance that a problematic path will affect flows
on other paths, and so that anycast environments can work with ECMP.
Signed-off-by: Peter Nørlund <pch@ordbogen.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 59 |
1 files changed, 52 insertions, 7 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0cca44476b1e..54297d3a0559 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1651,6 +1651,48 @@ out: return err; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* To make ICMP packets follow the right flow, the multipath hash is + * calculated from the inner IP addresses in reverse order. + */ +static int ip_multipath_icmp_hash(struct sk_buff *skb) +{ + const struct iphdr *outer_iph = ip_hdr(skb); + struct icmphdr _icmph; + const struct icmphdr *icmph; + struct iphdr _inner_iph; + const struct iphdr *inner_iph; + + if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) + goto standard_hash; + + icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), + &_icmph); + if (!icmph) + goto standard_hash; + + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_REDIRECT && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB) { + goto standard_hash; + } + + inner_iph = skb_header_pointer(skb, + outer_iph->ihl * 4 + sizeof(_icmph), + sizeof(_inner_iph), &_inner_iph); + if (!inner_iph) + goto standard_hash; + + return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); + +standard_hash: + return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); +} + +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ + static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, const struct flowi4 *fl4, @@ -1661,7 +1703,10 @@ static int ip_mkroute_input(struct sk_buff *skb, if (res->fi && res->fi->fib_nhs > 1) { int h; - h = fib_multipath_hash(saddr, daddr); + if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP)) + h = ip_multipath_icmp_hash(skb); + else + h = fib_multipath_hash(saddr, daddr); fib_select_multipath(res, h); } #endif @@ -2030,7 +2075,8 @@ add: * Major route resolver routine. */ -struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) +struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, + int mp_hash) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2194,10 +2240,9 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { - int h; - - h = fib_multipath_hash(fl4->saddr, fl4->daddr); - fib_select_multipath(&res, h); + if (mp_hash < 0) + mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr); + fib_select_multipath(&res, mp_hash); } else #endif @@ -2220,7 +2265,7 @@ out: rcu_read_unlock(); return rth; } -EXPORT_SYMBOL_GPL(__ip_route_output_key); +EXPORT_SYMBOL_GPL(__ip_route_output_key_hash); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) { |