diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 178 |
1 files changed, 112 insertions, 66 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 52b0b956508b..1730689f560e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -108,6 +108,7 @@ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif +#include <net/atmclip.h> #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) @@ -184,6 +185,8 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); + static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), @@ -198,6 +201,7 @@ static struct dst_ops ipv4_dst_ops = { .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .local_out = __ip_local_out, + .neigh_lookup = ipv4_neigh_lookup, }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -411,8 +415,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "HHUptod\tSpecDst"); else { struct rtable *r = v; + struct neighbour *n; int len; + n = dst_get_neighbour(&r->dst); seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", r->dst.dev ? r->dst.dev->name : "*", @@ -425,9 +431,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, - r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, - r->dst.hh ? (r->dst.hh->hh_output == - dev_queue_xmit) : 0, + -1, + (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); @@ -1006,6 +1011,37 @@ static int slow_chain_length(const struct rtable *head) return length >> FRACT_BITS; } +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) +{ + struct neigh_table *tbl = &arp_tbl; + static const __be32 inaddr_any = 0; + struct net_device *dev = dst->dev; + const __be32 *pkey = daddr; + struct neighbour *n; + +#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) + if (dev->type == ARPHRD_ATM) + tbl = clip_tbl_hook; +#endif + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + pkey = &inaddr_any; + + n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); + if (n) + return n; + return neigh_create(tbl, pkey, dev); +} + +static int rt_bind_neighbour(struct rtable *rt) +{ + struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + dst_set_neighbour(&rt->dst, n); + + return 0; +} + static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, struct sk_buff *skb, int ifindex) { @@ -1042,7 +1078,7 @@ restart: rt->dst.flags |= DST_NOCACHE; if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { - int err = arp_bind_neighbour(&rt->dst); + int err = rt_bind_neighbour(rt); if (err) { if (net_ratelimit()) printk(KERN_WARNING @@ -1138,7 +1174,7 @@ restart: route or unicast forwarding path. */ if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { - int err = arp_bind_neighbour(&rt->dst); + int err = rt_bind_neighbour(rt); if (err) { spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1316,6 +1352,23 @@ reject_redirect: ; } +static bool peer_pmtu_expired(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + time_after_eq(jiffies, orig) && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + +static bool peer_pmtu_cleaned(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1331,14 +1384,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && - rt->peer->pmtu_expires && - time_after_eq(jiffies, rt->peer->pmtu_expires)) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(dst, RTAX_MTU, - rt->peer->pmtu_orig); + } else if (rt->peer && peer_pmtu_expired(rt->peer)) { + dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); } } return ret; @@ -1428,20 +1475,20 @@ static int ip_error(struct sk_buff *skb) int code; switch (rt->dst.error) { - case EINVAL: - default: - goto out; - case EHOSTUNREACH: - code = ICMP_HOST_UNREACH; - break; - case ENETUNREACH: - code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(dev_net(rt->dst.dev), - IPSTATS_MIB_INNOROUTES); - break; - case EACCES: - code = ICMP_PKT_FILTERED; - break; + case EINVAL: + default: + goto out; + case EHOSTUNREACH: + code = ICMP_HOST_UNREACH; + break; + case ENETUNREACH: + code = ICMP_NET_UNREACH; + IP_INC_STATS_BH(dev_net(rt->dst.dev), + IPSTATS_MIB_INNOROUTES); + break; + case EACCES: + code = ICMP_PKT_FILTERED; + break; } if (!rt->peer) @@ -1531,8 +1578,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { - unsigned long expires = peer->pmtu_expires; + unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); + if (!expires) + return; if (time_before(jiffies, expires)) { u32 orig_dst_mtu = dst_mtu(dst); if (peer->pmtu_learned < orig_dst_mtu) { @@ -1555,10 +1604,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) rt_bind_peer(rt, rt->rt_dst, 1); peer = rt->peer; if (peer) { + unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; + if (!pmtu_expires || mtu < peer->pmtu_learned) { pmtu_expires = jiffies + ip_rt_mtu_expires; if (!pmtu_expires) @@ -1578,23 +1628,24 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; + struct neighbour *n; dst_confirm(&rt->dst); - neigh_release(rt->dst.neighbour); - rt->dst.neighbour = NULL; + neigh_release(dst_get_neighbour(&rt->dst)); + dst_set_neighbour(&rt->dst, NULL); rt->rt_gateway = peer->redirect_learned.a4; - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); + rt_bind_neighbour(rt); + n = dst_get_neighbour(&rt->dst); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); rt->rt_gateway = orig_gw; return -EAGAIN; } else { rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, - rt->dst.neighbour); + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } return 0; } @@ -1612,13 +1663,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) rt_bind_peer(rt, rt->rt_dst, 0); peer = rt->peer; - if (peer && peer->pmtu_expires) + if (peer) { check_peer_pmtu(dst, peer); - if (peer && peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) - return NULL; + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } } rt->rt_peer_genid = rt_peer_genid(); @@ -1649,14 +1701,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && - rt->peer && - rt->peer->pmtu_expires) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); - } + if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); } static int ip_rt_bug(struct sk_buff *skb) @@ -1694,7 +1740,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = iph->daddr; fl4.saddr = iph->saddr; - fl4.flowi4_tos = iph->tos; + fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = rt->dst.dev->ifindex; fl4.flowi4_iif = skb->dev->ifindex; fl4.flowi4_mark = skb->mark; @@ -1770,8 +1816,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); - if (peer->pmtu_expires) - check_peer_pmtu(&rt->dst, peer); + check_peer_pmtu(&rt->dst, peer); if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; @@ -1894,9 +1939,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); + return IS_ERR(rth) ? PTR_ERR(rth) : 0; e_nobufs: return -ENOBUFS; @@ -2702,6 +2745,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .cow_metrics = ipv4_rt_blackhole_cow_metrics, + .neigh_lookup = ipv4_neigh_lookup, }; struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) @@ -2775,7 +2819,8 @@ static int rt_fill_info(struct net *net, struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; - long expires; + long expires = 0; + const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -2823,15 +2868,16 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); error = rt->dst.error; - expires = (rt->peer && rt->peer->pmtu_expires) ? - rt->peer->pmtu_expires - jiffies : 0; - if (rt->peer) { + if (peer) { inet_peer_refcheck(rt->peer); - id = atomic_read(&rt->peer->ip_id_count) & 0xffff; - if (rt->peer->tcp_ts_stamp) { - ts = rt->peer->tcp_ts; - tsage = get_seconds() - rt->peer->tcp_ts_stamp; + id = atomic_read(&peer->ip_id_count) & 0xffff; + if (peer->tcp_ts_stamp) { + ts = peer->tcp_ts; + tsage = get_seconds() - peer->tcp_ts_stamp; } + expires = ACCESS_ONCE(peer->pmtu_expires); + if (expires) + expires -= jiffies; } if (rt_is_input_route(rt)) { @@ -3295,7 +3341,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(ip_rt_max_size); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); |