From 7586eceb0abc0ea1c2b023e3e5d4dfd4ff40930a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2012 05:02:19 +0000 Subject: ipv4: tcp: dont cache output dst for syncookies Don't cache output dst for syncookies, as this adds pressure on IP route cache and rcu subsystem for no gain. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp/ipv4.c') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 07f5579ca756..3eb76b5f221a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -504,7 +504,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; struct flowi4 fl4; - dst = inet_csk_route_req(sk, &fl4, req); + dst = inet_csk_route_req(sk, &fl4, req, false); if (dst == NULL) goto out; -- cgit v1.2.3 From 55be7a9c6074f749d617a7fc1914c9a23505438c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 21:27:49 -0700 Subject: ipv4: Add redirect support to all protocol icmp error handlers. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 11 +++++++++++ net/ipv4/ah4.c | 18 +++++++++++++----- net/ipv4/esp4.c | 18 +++++++++++++----- net/ipv4/ip_gre.c | 9 ++++++++- net/ipv4/ipcomp.c | 18 +++++++++++++----- net/ipv4/ipip.c | 9 +++++++++ net/ipv4/ping.c | 1 + net/ipv4/raw.c | 2 ++ net/ipv4/tcp_ipv4.c | 11 +++++++++++ net/ipv4/udp.c | 3 +++ net/ipv4/xfrm4_policy.c | 10 ++++++++++ net/sctp/input.c | 16 ++++++++++++++++ 12 files changed, 110 insertions(+), 16 deletions(-) (limited to 'net/dccp/ipv4.c') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3eb76b5f221a..8f41a3190858 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -195,6 +195,14 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } +static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst, skb); +} + /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -259,6 +267,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } switch (type) { + case ICMP_REDIRECT: + dccp_do_redirect(skb, sk); + goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ goto out; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 916d5ecaf6c6..a0d8392491c3 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -398,17 +398,25 @@ static void ah4_err(struct sk_buff *skb, u32 info) struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; - if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || - icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return; + case ICMP_REDIRECT: + break; + default: return; + } x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; - pr_debug("pmtu discovery on SA AH/%08x/%08x\n", - ntohl(ah->spi), ntohl(iph->daddr)); - ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); + + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); + else + ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 7b95b49a36ce..b61e9deb7c7e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -484,17 +484,25 @@ static void esp4_err(struct sk_buff *skb, u32 info) struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; - if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || - icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return; + case ICMP_REDIRECT: + break; + default: return; + } x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", - ntohl(esph->spi), ntohl(iph->daddr)); - ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); + + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); + else + ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 594cec35ac4d..0c3123566d76 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -528,6 +528,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) if (code != ICMP_EXC_TTL) return; break; + + case ICMP_REDIRECT: + break; } rcu_read_lock(); @@ -543,7 +546,11 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->parms.link, 0, IPPROTO_GRE, 0); goto out; } - + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, + IPPROTO_GRE, 0); + goto out; + } if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index b91375482d84..d3ab47e19a89 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -31,18 +31,26 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; - if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || - icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return; + case ICMP_REDIRECT: + break; + default: return; + } spi = htonl(ntohs(ipch->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", - spi, &iph->daddr); - ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); + + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); + else + ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 715338a1b205..c2d0e6d8baaf 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -360,6 +360,8 @@ static int ipip_err(struct sk_buff *skb, u32 info) if (code != ICMP_EXC_TTL) return 0; break; + case ICMP_REDIRECT: + break; } err = -ENOENT; @@ -376,6 +378,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) goto out; } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + IPPROTO_IPIP, 0); + err = 0; + goto out; + } + if (t->parms.iph.daddr == 0) goto out; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 340fcf29a966..6232d476f37e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -387,6 +387,7 @@ void ping_err(struct sk_buff *skb, u32 info) break; case ICMP_REDIRECT: /* See ICMP_SOURCE_QUENCH */ + ipv4_sk_redirect(skb, sk); err = EREMOTEIO; break; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 659ddfb10947..ff0f071969ea 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -218,6 +218,8 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) ipv4_sk_update_pmtu(skb, sk, info); + else if (type == ICMP_REDIRECT) + ipv4_sk_redirect(skb, sk); /* Report error on raw socket, if: 1. User requested ip_recverr. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 01545a3fc0f2..087a8488843f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -321,6 +321,14 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) } /* else let the usual retransmit timer handle it */ } +static void do_redirect(struct sk_buff *skb, struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst, skb); +} + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -394,6 +402,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } switch (type) { + case ICMP_REDIRECT: + do_redirect(icmp_skb, sk); + goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ee37d47d472e..b4c3582a991f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -630,6 +630,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) err = icmp_err_convert[code].errno; } break; + case ICMP_REDIRECT: + ipv4_sk_redirect(skb, sk); + break; } /* diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 87d3fcc302d4..258ebd7b268b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -202,6 +202,15 @@ static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } +static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct dst_entry *path = xdst->route; + + if (path->ops->redirect) + path->ops->redirect(path, skb); +} + static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -225,6 +234,7 @@ static struct dst_ops xfrm4_dst_ops = { .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, + .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, diff --git a/net/sctp/input.c b/net/sctp/input.c index 80564fe03024..9fb4247f9a99 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -423,6 +423,18 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } +static void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, + struct sk_buff *skb) +{ + struct dst_entry *dst; + + if (!t) + return; + dst = sctp_transport_dst_check(t); + if (dst && dst->ops->redirect) + dst->ops->redirect(dst, skb); +} + /* * SCTP Implementer's Guide, 2.37 ICMP handling procedures * @@ -628,6 +640,10 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) err = EHOSTUNREACH; break; + case ICMP_REDIRECT: + sctp_icmp_redirect(sk, transport, skb); + err = 0; + break; default: goto out_unlock; } -- cgit v1.2.3 From 1ed5c48f231cd00eac0b3d2350ac61e3c825063e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:41:25 -0700 Subject: net: Remove checks for dst_ops->redirect being NULL. No longer necessary. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/xfrm4_policy.c | 3 +-- net/ipv6/ip6_tunnel.c | 6 ++---- net/ipv6/tcp_ipv6.c | 2 +- net/sctp/input.c | 2 +- 7 files changed, 8 insertions(+), 11 deletions(-) (limited to 'net/dccp/ipv4.c') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8f41a3190858..129ed8f74138 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -199,7 +199,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) { struct dst_entry *dst = __sk_dst_check(sk, 0); - if (dst && dst->ops->redirect) + if (dst) dst->ops->redirect(dst, skb); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b4d7d28ce6d2..090c0800ce03 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -133,7 +133,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst && dst->ops->redirect) + if (dst) dst->ops->redirect(dst, skb); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 087a8488843f..7a0062cb4ed0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -325,7 +325,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) { struct dst_entry *dst = __sk_dst_check(sk, 0); - if (dst && dst->ops->redirect) + if (dst) dst->ops->redirect(dst, skb); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 258ebd7b268b..737131cef375 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -207,8 +207,7 @@ static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - if (path->ops->redirect) - path->ops->redirect(path, skb); + path->ops->redirect(path, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0b5b60ec6f4a..61d106597296 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -611,10 +611,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); } - if (rel_type == ICMP_REDIRECT) { - if (skb_dst(skb2)->ops->redirect) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); - } + if (rel_type == ICMP_REDIRECT) + skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7249e4bb9b8a..3071f377145c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -366,7 +366,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst && dst->ops->redirect) + if (dst) dst->ops->redirect(dst,skb); } diff --git a/net/sctp/input.c b/net/sctp/input.c index 5943b7d77ddb..f050d45faa98 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -431,7 +431,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, if (!t) return; dst = sctp_transport_dst_check(t); - if (dst && dst->ops->redirect) + if (dst) dst->ops->redirect(dst, skb); } -- cgit v1.2.3 From 80d0a69fc57715dc9080c0567df1ed911b78abea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:28:06 -0700 Subject: ipv4: Add helper inet_csk_update_pmtu(). This abstracts away the call to dst_ops->update_pmtu() so that we can transparently handle the fact that, in the future, the dst itself can be invalidated by the PMTU update (when we have non-host routes cached in sockets). So we try to rebuild the socket cached route after the method invocation if necessary. This isn't used by SCTP because it needs to cache dsts per-transport, and thus will need it's own local version of this helper. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ net/dccp/ipv4.c | 11 ++------- net/ipv4/inet_connection_sock.c | 46 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 11 ++------- 4 files changed, 52 insertions(+), 18 deletions(-) (limited to 'net/dccp/ipv4.c') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 291e7cee14e7..2cf44b4ed2e6 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -337,4 +337,6 @@ extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); + +extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 129ed8f74138..683902fcc8ed 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -161,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, if (sk->sk_state == DCCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 76825be3b643..200d21809379 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -803,3 +803,49 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); #endif + +static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 *fl4; + struct rtable *rt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + fl4 = &fl->u.ip4; + rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (IS_ERR(rt)) + rt = NULL; + if (rt) + sk_setup_caps(sk, &rt->dst); + rcu_read_unlock(); + + return &rt->dst; +} + +struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + struct inet_sock *inet = inet_sk(sk); + + if (!dst) { + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); + if (!dst) + goto out; + } + dst->ops->update_pmtu(dst, mtu); + + dst = __sk_dst_check(sk, 0); + if (!dst) + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); +out: + return dst; +} +EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7a0062cb4ed0..b8e7e0595407 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -289,17 +289,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) if (sk->sk_state == TCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ -- cgit v1.2.3 From 6700c2709c08d74ae2c3c29b84a30da012dbc7f1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 03:29:28 -0700 Subject: net: Pass optional SKB and SK arguments to dst_ops->{update_pmtu,redirect}() This will be used so that we can compose a full flow key. Even though we have a route in this context, we need more. In the future the routes will be without destination address, source address, etc. keying. One ipv4 route will cover entire subnets, etc. In this environment we have to have a way to possess persistent storage for redirects and PMTU information. This persistent storage will exist in the FIB tables, and that's why we'll need to be able to rebuild a full lookup flow key here. Using that flow key will do a fib_lookup() and create/update the persistent entry. Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- include/net/dst_ops.h | 6 ++++-- net/bridge/br_netfilter.c | 6 ++++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/decnet/dn_route.c | 12 ++++++++---- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/route.c | 21 +++++++++++++-------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/xfrm4_policy.c | 10 ++++++---- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 6 +++--- net/ipv6/route.c | 21 +++++++++++++-------- net/ipv6/sit.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/xfrm6_policy.c | 10 ++++++---- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- net/sctp/input.c | 2 +- net/sctp/transport.c | 2 +- 21 files changed, 71 insertions(+), 49 deletions(-) (limited to 'net/dccp/ipv4.c') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 014504d8e43c..1ca732201f33 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, int e = skb_queue_empty(&priv->cm.skb_queue); if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 085931fa7ce0..d079fc61c123 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -24,8 +24,10 @@ struct dst_ops { struct net_device *dev, int how); struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); - void (*update_pmtu)(struct dst_entry *dst, u32 mtu); - void (*redirect)(struct dst_entry *dst, struct sk_buff *skb); + void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); + void (*redirect)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); int (*local_out)(struct sk_buff *skb); struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 81f76c402cf2..68e8f364bbf8 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 683902fcc8ed..ab4f44c9bb21 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -193,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3ee0342e1cec..56840b249f3b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index e9c4e2e864c6..47de90d8fe94 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *); static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb , u32 mtu); +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); @@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops) * We update both the mtu and the advertised mss (i.e. the segment size we * advertise to the other end). */ -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; @@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 200d21809379..3ea465286a39 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -840,7 +840,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0c3123566d76..42c44b1403c9 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c2d0e6d8baaf..2c2c35bace76 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if ((old_iph->frag_off & htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aad21819316d..b35d3bfc66cd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -148,8 +148,10 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1273,7 +1275,7 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; @@ -1506,7 +1508,8 @@ out: kfree_skb(skb); return 0; } -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rtable *rt = (struct rtable *) dst; @@ -1531,7 +1534,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, mtu); + ip_rt_update_pmtu(&rt->dst, NULL, skb, mtu); ip_rt_put(rt); } } @@ -1559,7 +1562,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_do_redirect(&rt->dst, skb); + ip_do_redirect(&rt->dst, NULL, skb); ip_rt_put(rt); } } @@ -2587,11 +2590,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b8e7e0595407..d9caf5c07aae 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -319,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 737131cef375..fcf7678bc009 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } -static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 62539a4b2dc7..4a0c4d2d8b05 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -269,7 +269,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); return inet6_csk_route_socket(sk); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 61d106597296..db3284667968 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); } if (rel_type == ICMP_REDIRECT) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2a4c8d48977f..31af1ed6c1dc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } @@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; @@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, ntohl(mtu)); + ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - rt6_do_redirect(dst, skb); + rt6_do_redirect(dst, NULL, skb); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_redirect); @@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fbf1622fdeef..3bd1bfc01f85 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ecdf241cad02..c9dabdd832d7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst,skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f5a9cb8257b9..ef39812107b1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } -static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 71d6ecb65926..65b616ae1716 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -797,7 +797,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); df |= (old_iph->frag_off & htons(IP_DF)); @@ -913,7 +913,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && !skb_is_gso(skb)) { diff --git a/net/sctp/input.c b/net/sctp/input.c index a67bc31f49fd..c201b26879a1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -432,7 +432,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, return; dst = sctp_transport_dst_check(t); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e69e1a2175a4..a6b7ee9ce28a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -249,7 +249,7 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); if (dst) { - dst->ops->update_pmtu(dst, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu); dst = sctp_transport_dst_check(t); if (!dst) -- cgit v1.2.3 From ba3f7f04ef2b19aace38f855aedd17fe43035d50 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:02:46 -0700 Subject: ipv4: Kill FLOWI_FLAG_RT_NOCACHE and associated code. Signed-off-by: David S. Miller --- include/net/flow.h | 1 - include/net/inet_connection_sock.h | 3 +-- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 5 +---- net/ipv4/route.c | 3 --- net/ipv4/tcp_ipv4.c | 4 ++-- 6 files changed, 5 insertions(+), 13 deletions(-) (limited to 'net/dccp/ipv4.c') diff --git a/include/net/flow.h b/include/net/flow.h index ce9cb7656b47..e1dd5082ec7e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -21,7 +21,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_CAN_SLEEP 0x02 -#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2cf44b4ed2e6..5ee66f517b4f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -250,8 +250,7 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum); extern struct dst_entry* inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache); + const struct request_sock *req); extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, struct sock *newsk, const struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ab4f44c9bb21..25428d0c50c9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -508,7 +508,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; struct flowi4 fl4; - dst = inet_csk_route_req(sk, &fl4, req, false); + dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0a290d719bc7..db0cf17c00f7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,8 +368,7 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache) + const struct request_sock *req) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); @@ -377,8 +376,6 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - if (nocache) - flags |= FLOWI_FLAG_RT_NOCACHE; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97cca8a03d94..7e1c0ed0ef70 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1836,9 +1836,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); - if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) - rth->dst.flags |= DST_NOCACHE; - return rth; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d8b75a58981..59110caeb074 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -824,7 +824,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; skb = tcp_make_synack(sk, dst, req, rvp); @@ -1378,7 +1378,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && + (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && fl4.daddr == saddr) { if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); -- cgit v1.2.3 From 92101b3b2e3178087127709a556b091dae314e9e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Jul 2012 16:29:00 -0700 Subject: ipv4: Prepare for change of rt->rt_iif encoding. Use inet_iif() consistently, and for TCP record the input interface of cached RX dst in inet sock. rt->rt_iif is going to be encoded differently, so that we can legitimately cache input routes in the FIB info more aggressively. When the input interface is "use SKB device index" the rt->rt_iif will be set to zero. This forces us to move the TCP RX dst cache installation into the ipv4 specific code, and as well it should since doing the route caching for ipv6 is pointless at the moment since it is not inspected in the ipv6 input paths yet. Also, remove the unlikely on dst->obsolete, all ipv4 dsts have obsolete set to a non-zero value to force invocation of the check callback. Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + net/dccp/ipv4.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/ip_sockglue.c | 5 ++--- net/ipv4/route.c | 2 +- net/ipv4/tcp_input.c | 12 ------------ net/ipv4/tcp_ipv4.c | 24 ++++++++++++++++++------ net/sched/cls_route.c | 2 +- net/sched/em_meta.c | 2 +- net/sctp/protocol.c | 2 +- 10 files changed, 27 insertions(+), 27 deletions(-) (limited to 'net/dccp/ipv4.c') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 924d7b98ab60..613cfa401672 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -172,6 +172,7 @@ struct inet_sock { int uc_index; int mc_index; __be32 mc_addr; + int rx_dst_ifindex; struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 25428d0c50c9..176ecdba4a22 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -481,7 +481,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct rtable *rt; const struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { - .flowi4_oif = skb_rtable(skb)->rt_iif, + .flowi4_oif = inet_iif(skb), .daddr = iph->saddr, .saddr = iph->daddr, .flowi4_tos = RT_CONN_FLAGS(sk), diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f2a06beffbd3..f2eccd531746 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -571,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) rcu_read_lock(); if (rt_is_input_route(rt) && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index_rcu(net, rt->rt_iif); + dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index de29f46f68b0..5eea4a811042 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1027,10 +1027,9 @@ e_inval: void ipv4_pktinfo_prepare(struct sk_buff *skb) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); - const struct rtable *rt = skb_rtable(skb); - if (rt) { - pktinfo->ipi_ifindex = rt->rt_iif; + if (skb_rtable(skb)) { + pktinfo->ipi_ifindex = inet_iif(skb); pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34017be87c85..f6be78119396 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -848,7 +848,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (log_martians && peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", - &ip_hdr(skb)->saddr, rt->rt_iif, + &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &rt->rt_gateway); #endif } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 21d7f8f3a7a5..3e07a64ca44e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5391,18 +5391,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - if (sk->sk_rx_dst) { - struct dst_entry *dst = sk->sk_rx_dst; - if (unlikely(dst->obsolete)) { - if (dst->ops->check(dst, 0) == NULL) { - dst_release(dst); - sk->sk_rx_dst = NULL; - } - } - } - if (unlikely(sk->sk_rx_dst == NULL)) - sk->sk_rx_dst = dst_clone(skb_dst(skb)); - /* * Header prediction. * The code loosely follows the one in the famous diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bc5432e3c778..3e30548ac32a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1618,6 +1618,20 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb); + if (sk->sk_rx_dst) { + struct dst_entry *dst = sk->sk_rx_dst; + if (dst->ops->check(dst, 0) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (unlikely(sk->sk_rx_dst == NULL)) { + struct inet_sock *icsk = inet_sk(sk); + struct rtable *rt = skb_rtable(skb); + + sk->sk_rx_dst = dst_clone(&rt->dst); + icsk->rx_dst_ifindex = inet_iif(skb); + } if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1700,14 +1714,12 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); if (dst) dst = dst_check(dst, 0); - if (dst) { - struct rtable *rt = (struct rtable *) dst; - - if (rt->rt_iif == dev->ifindex) - skb_dst_set_noref(skb, dst); - } + if (dst && + icsk->rx_dst_ifindex == dev->ifindex) + skb_dst_set_noref(skb, dst); } } } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 36fec4227401..44f405cb9aaf 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (head == NULL) goto old_method; - iif = ((struct rtable *)dst)->rt_iif; + iif = inet_iif(skb); h = route4_fastmap_hash(id, iif); if (id == head->fastmap[h].id && diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4790c696cbce..4ab6e3325573 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif) if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else - dst->value = skb_rtable(skb)->rt_iif; + dst->value = inet_iif(skb); } /************************************************************************** diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9c90811d1134..1f89c4e69645 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -568,7 +568,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk, /* What interface did this skb arrive on? */ static int sctp_v4_skb_iif(const struct sk_buff *skb) { - return skb_rtable(skb)->rt_iif; + return inet_iif(skb); } /* Was this packet marked by Explicit Congestion Notification? */ -- cgit v1.2.3