summaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c178
1 files changed, 112 insertions, 66 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 52b0b956508b..1730689f560e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -108,6 +108,7 @@
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
+#include <net/atmclip.h>
#define RT_FL_TOS(oldflp4) \
((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
@@ -184,6 +185,8 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
return p;
}
+static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr);
+
static struct dst_ops ipv4_dst_ops = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
@@ -198,6 +201,7 @@ static struct dst_ops ipv4_dst_ops = {
.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
.local_out = __ip_local_out,
+ .neigh_lookup = ipv4_neigh_lookup,
};
#define ECN_OR_COST(class) TC_PRIO_##class
@@ -411,8 +415,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
"HHUptod\tSpecDst");
else {
struct rtable *r = v;
+ struct neighbour *n;
int len;
+ n = dst_get_neighbour(&r->dst);
seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
"%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
r->dst.dev ? r->dst.dev->name : "*",
@@ -425,9 +431,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
dst_metric(&r->dst, RTAX_RTTVAR)),
r->rt_key_tos,
- r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
- r->dst.hh ? (r->dst.hh->hh_output ==
- dev_queue_xmit) : 0,
+ -1,
+ (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0,
r->rt_spec_dst, &len);
seq_printf(seq, "%*s\n", 127 - len, "");
@@ -1006,6 +1011,37 @@ static int slow_chain_length(const struct rtable *head)
return length >> FRACT_BITS;
}
+static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+{
+ struct neigh_table *tbl = &arp_tbl;
+ static const __be32 inaddr_any = 0;
+ struct net_device *dev = dst->dev;
+ const __be32 *pkey = daddr;
+ struct neighbour *n;
+
+#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
+ if (dev->type == ARPHRD_ATM)
+ tbl = clip_tbl_hook;
+#endif
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ pkey = &inaddr_any;
+
+ n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey);
+ if (n)
+ return n;
+ return neigh_create(tbl, pkey, dev);
+}
+
+static int rt_bind_neighbour(struct rtable *rt)
+{
+ struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ dst_set_neighbour(&rt->dst, n);
+
+ return 0;
+}
+
static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
struct sk_buff *skb, int ifindex)
{
@@ -1042,7 +1078,7 @@ restart:
rt->dst.flags |= DST_NOCACHE;
if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
- int err = arp_bind_neighbour(&rt->dst);
+ int err = rt_bind_neighbour(rt);
if (err) {
if (net_ratelimit())
printk(KERN_WARNING
@@ -1138,7 +1174,7 @@ restart:
route or unicast forwarding path.
*/
if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
- int err = arp_bind_neighbour(&rt->dst);
+ int err = rt_bind_neighbour(rt);
if (err) {
spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1316,6 +1352,23 @@ reject_redirect:
;
}
+static bool peer_pmtu_expired(struct inet_peer *peer)
+{
+ unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
+
+ return orig &&
+ time_after_eq(jiffies, orig) &&
+ cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
+}
+
+static bool peer_pmtu_cleaned(struct inet_peer *peer)
+{
+ unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
+
+ return orig &&
+ cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
+}
+
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *)dst;
@@ -1331,14 +1384,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
rt_genid(dev_net(dst->dev)));
rt_del(hash, rt);
ret = NULL;
- } else if (rt->peer &&
- rt->peer->pmtu_expires &&
- time_after_eq(jiffies, rt->peer->pmtu_expires)) {
- unsigned long orig = rt->peer->pmtu_expires;
-
- if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
- dst_metric_set(dst, RTAX_MTU,
- rt->peer->pmtu_orig);
+ } else if (rt->peer && peer_pmtu_expired(rt->peer)) {
+ dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
}
}
return ret;
@@ -1428,20 +1475,20 @@ static int ip_error(struct sk_buff *skb)
int code;
switch (rt->dst.error) {
- case EINVAL:
- default:
- goto out;
- case EHOSTUNREACH:
- code = ICMP_HOST_UNREACH;
- break;
- case ENETUNREACH:
- code = ICMP_NET_UNREACH;
- IP_INC_STATS_BH(dev_net(rt->dst.dev),
- IPSTATS_MIB_INNOROUTES);
- break;
- case EACCES:
- code = ICMP_PKT_FILTERED;
- break;
+ case EINVAL:
+ default:
+ goto out;
+ case EHOSTUNREACH:
+ code = ICMP_HOST_UNREACH;
+ break;
+ case ENETUNREACH:
+ code = ICMP_NET_UNREACH;
+ IP_INC_STATS_BH(dev_net(rt->dst.dev),
+ IPSTATS_MIB_INNOROUTES);
+ break;
+ case EACCES:
+ code = ICMP_PKT_FILTERED;
+ break;
}
if (!rt->peer)
@@ -1531,8 +1578,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
{
- unsigned long expires = peer->pmtu_expires;
+ unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
+ if (!expires)
+ return;
if (time_before(jiffies, expires)) {
u32 orig_dst_mtu = dst_mtu(dst);
if (peer->pmtu_learned < orig_dst_mtu) {
@@ -1555,10 +1604,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
rt_bind_peer(rt, rt->rt_dst, 1);
peer = rt->peer;
if (peer) {
+ unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
+
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
- if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
- unsigned long pmtu_expires;
+ if (!pmtu_expires || mtu < peer->pmtu_learned) {
pmtu_expires = jiffies + ip_rt_mtu_expires;
if (!pmtu_expires)
@@ -1578,23 +1628,24 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
{
struct rtable *rt = (struct rtable *) dst;
__be32 orig_gw = rt->rt_gateway;
+ struct neighbour *n;
dst_confirm(&rt->dst);
- neigh_release(rt->dst.neighbour);
- rt->dst.neighbour = NULL;
+ neigh_release(dst_get_neighbour(&rt->dst));
+ dst_set_neighbour(&rt->dst, NULL);
rt->rt_gateway = peer->redirect_learned.a4;
- if (arp_bind_neighbour(&rt->dst) ||
- !(rt->dst.neighbour->nud_state & NUD_VALID)) {
- if (rt->dst.neighbour)
- neigh_event_send(rt->dst.neighbour, NULL);
+ rt_bind_neighbour(rt);
+ n = dst_get_neighbour(&rt->dst);
+ if (!n || !(n->nud_state & NUD_VALID)) {
+ if (n)
+ neigh_event_send(n, NULL);
rt->rt_gateway = orig_gw;
return -EAGAIN;
} else {
rt->rt_flags |= RTCF_REDIRECTED;
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
- rt->dst.neighbour);
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
}
return 0;
}
@@ -1612,13 +1663,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
rt_bind_peer(rt, rt->rt_dst, 0);
peer = rt->peer;
- if (peer && peer->pmtu_expires)
+ if (peer) {
check_peer_pmtu(dst, peer);
- if (peer && peer->redirect_learned.a4 &&
- peer->redirect_learned.a4 != rt->rt_gateway) {
- if (check_peer_redir(dst, peer))
- return NULL;
+ if (peer->redirect_learned.a4 &&
+ peer->redirect_learned.a4 != rt->rt_gateway) {
+ if (check_peer_redir(dst, peer))
+ return NULL;
+ }
}
rt->rt_peer_genid = rt_peer_genid();
@@ -1649,14 +1701,8 @@ static void ipv4_link_failure(struct sk_buff *skb)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
rt = skb_rtable(skb);
- if (rt &&
- rt->peer &&
- rt->peer->pmtu_expires) {
- unsigned long orig = rt->peer->pmtu_expires;
-
- if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
- dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
- }
+ if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
+ dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
}
static int ip_rt_bug(struct sk_buff *skb)
@@ -1694,7 +1740,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = iph->daddr;
fl4.saddr = iph->saddr;
- fl4.flowi4_tos = iph->tos;
+ fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_oif = rt->dst.dev->ifindex;
fl4.flowi4_iif = skb->dev->ifindex;
fl4.flowi4_mark = skb->mark;
@@ -1770,8 +1816,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
sizeof(u32) * RTAX_MAX);
dst_init_metrics(&rt->dst, peer->metrics, false);
- if (peer->pmtu_expires)
- check_peer_pmtu(&rt->dst, peer);
+ check_peer_pmtu(&rt->dst, peer);
if (peer->redirect_learned.a4 &&
peer->redirect_learned.a4 != rt->rt_gateway) {
rt->rt_gateway = peer->redirect_learned.a4;
@@ -1894,9 +1939,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
- err = 0;
- if (IS_ERR(rth))
- err = PTR_ERR(rth);
+ return IS_ERR(rth) ? PTR_ERR(rth) : 0;
e_nobufs:
return -ENOBUFS;
@@ -2702,6 +2745,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
.default_advmss = ipv4_default_advmss,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
.cow_metrics = ipv4_rt_blackhole_cow_metrics,
+ .neigh_lookup = ipv4_neigh_lookup,
};
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -2775,7 +2819,8 @@ static int rt_fill_info(struct net *net,
struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
- long expires;
+ long expires = 0;
+ const struct inet_peer *peer = rt->peer;
u32 id = 0, ts = 0, tsage = 0, error;
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
@@ -2823,15 +2868,16 @@ static int rt_fill_info(struct net *net,
NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
error = rt->dst.error;
- expires = (rt->peer && rt->peer->pmtu_expires) ?
- rt->peer->pmtu_expires - jiffies : 0;
- if (rt->peer) {
+ if (peer) {
inet_peer_refcheck(rt->peer);
- id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
- if (rt->peer->tcp_ts_stamp) {
- ts = rt->peer->tcp_ts;
- tsage = get_seconds() - rt->peer->tcp_ts_stamp;
+ id = atomic_read(&peer->ip_id_count) & 0xffff;
+ if (peer->tcp_ts_stamp) {
+ ts = peer->tcp_ts;
+ tsage = get_seconds() - peer->tcp_ts_stamp;
}
+ expires = ACCESS_ONCE(peer->pmtu_expires);
+ if (expires)
+ expires -= jiffies;
}
if (rt_is_input_route(rt)) {
@@ -3295,7 +3341,7 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init(ip_rt_max_size);
#endif
- rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);