diff options
-rw-r--r-- | include/net/ip_vs.h | 4 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 171 |
2 files changed, 96 insertions, 79 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52fbe2308c38..6e8a6192e574 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -529,6 +529,10 @@ struct ip_vs_dest { spinlock_t dst_lock; /* lock of dst_cache */ struct dst_entry *dst_cache; /* destination cache entry */ u32 dst_rtos; /* RT_TOS(tos) for dst */ + u32 dst_cookie; +#ifdef CONFIG_IP_VS_IPV6 + struct in6_addr dst_saddr; +#endif /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 8817afa34e6a..b0bd8afbf368 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -26,6 +26,7 @@ #include <net/route.h> /* for ip_route_output */ #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/addrconf.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -37,26 +38,27 @@ * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst) +__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, + u32 dst_cookie) { struct dst_entry *old_dst; old_dst = dest->dst_cache; dest->dst_cache = dst; dest->dst_rtos = rtos; + dest->dst_cookie = dst_cookie; dst_release(old_dst); } static inline struct dst_entry * -__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) +__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) { struct dst_entry *dst = dest->dst_cache; if (!dst) return NULL; - if ((dst->obsolete - || (dest->af == AF_INET && rtos != dest->dst_rtos)) && - dst->ops->check(dst, cookie) == NULL) { + if ((dst->obsolete || rtos != dest->dst_rtos) && + dst->ops->check(dst, dest->dst_cookie) == NULL) { dest->dst_cache = NULL; dst_release(dst); return NULL; @@ -66,15 +68,16 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) } static struct rtable * -__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) +__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) { + struct net *net = dev_net(skb->dev); struct rtable *rt; /* Route to the other host */ struct ip_vs_dest *dest = cp->dest; if (dest) { spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) - __ip_vs_dst_check(dest, rtos, 0))) { + __ip_vs_dst_check(dest, rtos))) { struct flowi fl = { .oif = 0, .nl_u = { @@ -84,13 +87,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .tos = rtos, } }, }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &dest->addr.ip); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); + __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, atomic_read(&rt->dst.__refcnt), rtos); @@ -106,7 +109,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .tos = rtos, } }, }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &cp->daddr.ip); return NULL; @@ -117,62 +120,79 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) } #ifdef CONFIG_IP_VS_IPV6 + +static struct dst_entry * +__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, + struct in6_addr *ret_saddr, int do_xfrm) +{ + struct dst_entry *dst; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = *daddr, + }, + }, + }; + + dst = ip6_route_output(net, NULL, &fl); + if (dst->error) + goto out_err; + if (!ret_saddr) + return dst; + if (ipv6_addr_any(&fl.fl6_src) && + ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, + &fl.fl6_dst, 0, &fl.fl6_src) < 0) + goto out_err; + if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0) + goto out_err; + ipv6_addr_copy(ret_saddr, &fl.fl6_src); + return dst; + +out_err: + dst_release(dst); + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); + return NULL; +} + static struct rt6_info * -__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) +__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct in6_addr *ret_saddr, int do_xfrm) { + struct net *net = dev_net(skb->dev); struct rt6_info *rt; /* Route to the other host */ struct ip_vs_dest *dest = cp->dest; + struct dst_entry *dst; if (dest) { spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); + rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); if (!rt) { - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = dest->addr.in6, - .saddr = { - .s6_addr32 = - { 0, 0, 0, 0 }, - }, - }, - }, - }; + u32 cookie; - rt = (struct rt6_info *)ip6_route_output(&init_net, - NULL, &fl); - if (!rt) { + dst = __ip_vs_route_output_v6(net, &dest->addr.in6, + &dest->dst_saddr, + do_xfrm); + if (!dst) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", - &dest->addr.in6); return NULL; } - __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); - IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", - &dest->addr.in6, + rt = (struct rt6_info *) dst; + cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); + IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", + &dest->addr.in6, &dest->dst_saddr, atomic_read(&rt->dst.__refcnt)); } + if (ret_saddr) + ipv6_addr_copy(ret_saddr, &dest->dst_saddr); spin_unlock(&dest->dst_lock); } else { - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = cp->daddr.in6, - .saddr = { - .s6_addr32 = { 0, 0, 0, 0 }, - }, - }, - }, - }; - - rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); - if (!rt) { - IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", - &cp->daddr.in6); + dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, + do_xfrm); + if (!dst) return NULL; - } + rt = (struct rt6_info *) dst; } return rt; @@ -248,6 +268,7 @@ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct net *net = dev_net(skb->dev); struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); u8 tos = iph->tos; @@ -263,7 +284,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", __func__, &iph->daddr); goto tx_error_icmp; @@ -313,25 +334,18 @@ int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct net *net = dev_net(skb->dev); + struct dst_entry *dst; struct rt6_info *rt; /* Route to the other host */ struct ipv6hdr *iph = ipv6_hdr(skb); int mtu; - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = iph->daddr, - .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, - }; EnterFunction(10); - rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); - if (!rt) { - IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n", - __func__, &iph->daddr); + dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); + if (!dst) goto tx_error_icmp; - } + rt = (struct rt6_info *) dst; /* MTU checking */ mtu = dst_mtu(&rt->dst); @@ -397,7 +411,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) goto tx_error_icmp; /* MTU checking */ @@ -472,7 +486,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!rt) goto tx_error_icmp; @@ -557,7 +571,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; __be16 df = old_iph->frag_off; - sk_buff_data_t old_transport_header = skb->transport_header; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -572,7 +585,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos)))) goto tx_error_icmp; tdev = rt->dst.dev; @@ -616,7 +629,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ip_hdr(skb); } - skb->transport_header = old_transport_header; + skb->transport_header = skb->network_header; /* fix old IP header checksum */ ip_send_check(old_iph); @@ -670,9 +683,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rt6_info *rt; /* Route to the other host */ + struct in6_addr saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ struct ipv6hdr *old_iph = ipv6_hdr(skb); - sk_buff_data_t old_transport_header = skb->transport_header; struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -687,17 +700,17 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1); if (!rt) goto tx_error_icmp; tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); - /* TODO IPv6: do we need this check in IPv6? */ - if (mtu < 1280) { + if (mtu < IPV6_MIN_MTU) { dst_release(&rt->dst); - IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); + IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, + IPV6_MIN_MTU); goto tx_error; } if (skb_dst(skb)) @@ -730,7 +743,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ipv6_hdr(skb); } - skb->transport_header = old_transport_header; + skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); @@ -750,8 +763,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); - iph->daddr = rt->rt6i_dst.addr; - iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ + ipv6_addr_copy(&iph->daddr, &cp->daddr.in6); + ipv6_addr_copy(&iph->saddr, &saddr); iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ @@ -791,7 +804,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) goto tx_error_icmp; /* MTU checking */ @@ -843,7 +856,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!rt) goto tx_error_icmp; @@ -919,7 +932,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) goto tx_error_icmp; /* MTU checking */ @@ -993,7 +1006,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!rt) goto tx_error_icmp; |