From 9bf9055eb716f85372c41b3fbc51f90bc7653740 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 17 Apr 2011 00:14:09 -0700 Subject: decnet: Fix set-but-unused variable. "next" in dn_rebuild_zone() is set but not actually used, kill it off. Signed-off-by: David S. Miller --- net/decnet/dn_table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/decnet') diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 99d8d3a40998..d8ea583076cf 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -124,11 +124,10 @@ static inline void dn_rebuild_zone(struct dn_zone *dz, int old_divisor) { int i; - struct dn_fib_node *f, **fp, *next; + struct dn_fib_node *f, **fp; for(i = 0; i < old_divisor; i++) { for(f = old_ht[i]; f; f = f->fn_next) { - next = f->fn_next; for(fp = dn_chain_p(f->fn_key, dz); *fp && dn_key_leq((*fp)->fn_key, f->fn_key); fp = &(*fp)->fn_next) -- cgit v1.2.3 From a01c1335a308ee660518e33db03fb5f5e1dfc166 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 17 Apr 2011 20:47:07 -0700 Subject: decnet: Don't leak entries when rebuilding zone. As noticed by Ben Hutchings, when we move entries from one table to another we leak all except the first entry. Put back the "next" variable removed by commit 9bf9055eb716f85372c41b3fbc51f90bc7653740 ("decnet: Fix set-but-unused variable.") and use it properly. Reported-by: Ben Hutchings Signed-off-by: David S. Miller --- net/decnet/dn_table.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/decnet') diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index d8ea583076cf..bd0a52dd1d40 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -123,11 +123,12 @@ static inline void dn_rebuild_zone(struct dn_zone *dz, struct dn_fib_node **old_ht, int old_divisor) { + struct dn_fib_node *f, **fp, *next; int i; - struct dn_fib_node *f, **fp; for(i = 0; i < old_divisor; i++) { - for(f = old_ht[i]; f; f = f->fn_next) { + for(f = old_ht[i]; f; f = next) { + next = f->fn_next; for(fp = dn_chain_p(f->fn_key, dz); *fp && dn_key_leq((*fp)->fn_key, f->fn_key); fp = &(*fp)->fn_next) -- cgit v1.2.3 From 5c1e6aa300a7a669dc469d2dcb20172c6bd8fed9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 14:13:38 -0700 Subject: net: Make dst_alloc() take more explicit initializations. Now the dst->dev, dev->obsolete, and dst->flags values can be specified as well. Signed-off-by: David S. Miller --- include/net/dst.h | 3 ++- net/core/dst.c | 18 +++++++++++++----- net/decnet/dn_route.c | 13 ++----------- net/ipv4/route.c | 40 +++++++++++++++------------------------- net/ipv6/route.c | 29 +++++++++++------------------ net/xfrm/xfrm_policy.c | 2 +- 6 files changed, 44 insertions(+), 61 deletions(-) (limited to 'net/decnet') diff --git a/include/net/dst.h b/include/net/dst.h index d7bb74062df1..2588a9a88cc6 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -350,7 +350,8 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) } extern int dst_discard(struct sk_buff *skb); -extern void *dst_alloc(struct dst_ops * ops, int initial_ref); +extern void *dst_alloc(struct dst_ops * ops, struct net_device *dev, + int initial_ref, int initial_obsolete, int flags); extern void __dst_free(struct dst_entry * dst); extern struct dst_entry *dst_destroy(struct dst_entry * dst); diff --git a/net/core/dst.c b/net/core/dst.c index 91104d35de7d..9505778ec800 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -166,7 +166,8 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; -void *dst_alloc(struct dst_ops *ops, int initial_ref) +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, + int initial_ref, int initial_obsolete, int flags) { struct dst_entry *dst; @@ -177,12 +178,19 @@ void *dst_alloc(struct dst_ops *ops, int initial_ref) dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - atomic_set(&dst->__refcnt, initial_ref); dst->ops = ops; - dst->lastuse = jiffies; - dst->path = dst; - dst->input = dst->output = dst_discard; + dst->dev = dev; + if (dev) + dev_hold(dev); dst_init_metrics(dst, dst_default_metrics, true); + dst->path = dst; + dst->input = dst_discard; + dst->output = dst_discard; + + dst->obsolete = initial_obsolete; + atomic_set(&dst->__refcnt, initial_ref); + dst->lastuse = jiffies; + dst->flags = flags; #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9f09d4fc2880..f489b081c25d 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1125,13 +1125,10 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops, 0); + rt = dst_alloc(&dn_dst_ops, dev_out, 1, 0, DST_HOST); if (rt == NULL) goto e_nobufs; - atomic_set(&rt->dst.__refcnt, 1); - rt->dst.flags = DST_HOST; - rt->fld.saddr = oldflp->saddr; rt->fld.daddr = oldflp->daddr; rt->fld.flowidn_oif = oldflp->flowidn_oif; @@ -1146,8 +1143,6 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - rt->dst.dev = dev_out; - dev_hold(dev_out); rt->dst.neighbour = neigh; neigh = NULL; @@ -1399,7 +1394,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops, 0); + rt = dst_alloc(&dn_dst_ops, out_dev, 0, 0, DST_HOST); if (rt == NULL) goto e_nobufs; @@ -1419,9 +1414,7 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - rt->dst.flags = DST_HOST; rt->dst.neighbour = neigh; - rt->dst.dev = out_dev; rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; switch(res.type) { @@ -1440,8 +1433,6 @@ make_route: rt->dst.input = dst_discard; } rt->rt_flags = flags; - if (rt->dst.dev) - dev_hold(rt->dst.dev); err = dn_rt_set_next_hop(rt, &res); if (err) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d63f780c6941..b471d89b57ee 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1833,17 +1833,13 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, rt->rt_type = type; } -static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) +static struct rtable *rt_dst_alloc(struct net_device *dev, + bool nopolicy, bool noxfrm) { - struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); - if (rt) { - rt->dst.obsolete = -1; - - rt->dst.flags = DST_HOST | - (nopolicy ? DST_NOPOLICY : 0) | - (noxfrm ? DST_NOXFRM : 0); - } - return rt; + return dst_alloc(&ipv4_dst_ops, dev, 1, -1, + DST_HOST | + (nopolicy ? DST_NOPOLICY : 0) | + (noxfrm ? DST_NOXFRM : 0)); } /* called in rcu_read_lock() section */ @@ -1876,7 +1872,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (err < 0) goto e_err; } - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + rth = rt_dst_alloc(init_net.loopback_dev, + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -1893,8 +1890,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->dst.dev = init_net.loopback_dev; - dev_hold(rth->dst.dev); rth->rt_oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; @@ -2013,7 +2008,8 @@ static int __mkroute_input(struct sk_buff *skb, } } - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + rth = rt_dst_alloc(out_dev->dev, + IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; @@ -2029,8 +2025,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; - rth->dst.dev = (out_dev)->dev; - dev_hold(rth->dst.dev); rth->rt_oif = 0; rth->rt_spec_dst= spec_dst; @@ -2188,7 +2182,8 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + rth = rt_dst_alloc(net->loopback_dev, + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -2206,8 +2201,6 @@ local_input: #endif rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->dst.dev = net->loopback_dev; - dev_hold(rth->dst.dev); rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->dst.input= ip_local_deliver; @@ -2392,7 +2385,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } - rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + rth = rt_dst_alloc(dev_out, + IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); @@ -2406,10 +2400,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; - /* get references to the devices that are to be hold by the routing - cache entry */ - rth->dst.dev = dev_out; - dev_hold(dev_out); rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; @@ -2711,7 +2701,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); + struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); struct rtable *ort = (struct rtable *) dst_orig; if (rt) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 19a77d0e0308..e8b2bb9060ef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -227,9 +227,10 @@ static struct rt6_info ip6_blk_hole_entry_template = { #endif /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) +static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, + struct net_device *dev) { - return (struct rt6_info *)dst_alloc(ops, 0); + return (struct rt6_info *)dst_alloc(ops, dev, 0, 0, 0); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -881,10 +882,10 @@ EXPORT_SYMBOL(ip6_route_output); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1); - struct rt6_info *ort = (struct rt6_info *) dst_orig; + struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; struct dst_entry *new = NULL; + rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); if (rt) { new = &rt->dst; @@ -893,9 +894,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->output = dst_discard; dst_copy_metrics(new, &ort->dst); - new->dev = ort->dst.dev; - if (new->dev) - dev_hold(new->dev); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); @@ -1038,13 +1036,12 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; } - dev_hold(dev); if (neigh) neigh_hold(neigh); else { @@ -1053,7 +1050,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, neigh = NULL; } - rt->rt6i_dev = dev; rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; atomic_set(&rt->dst.__refcnt, 1); @@ -1212,7 +1208,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL); if (rt == NULL) { err = -ENOMEM; @@ -1731,7 +1727,8 @@ void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *sad static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { struct net *net = dev_net(ort->rt6i_dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); + struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, + ort->dst.dev); if (rt) { rt->dst.input = ort->dst.input; @@ -1739,9 +1736,6 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; - rt->dst.dev = ort->dst.dev; - if (rt->dst.dev) - dev_hold(rt->dst.dev); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); @@ -2011,7 +2005,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, int anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); + struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, + net->loopback_dev); struct neighbour *neigh; if (rt == NULL) { @@ -2021,13 +2016,11 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, return ERR_PTR(-ENOMEM); } - dev_hold(net->loopback_dev); in6_dev_hold(idev); rt->dst.flags = DST_HOST; rt->dst.input = ip6_input; rt->dst.output = ip6_output; - rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; rt->dst.obsolete = -1; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 15792d8b6272..70552c4e2272 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1348,7 +1348,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, 0); + xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); xfrm_policy_put_afinfo(afinfo); if (likely(xdst)) -- cgit v1.2.3 From cf91166223772ef4a2ed98b9874958bf6a2470df Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 14:31:47 -0700 Subject: net: Use non-zero allocations in dst_alloc(). Make dst_alloc() and it's users explicitly initialize the entire entry. The zero'ing done by kmem_cache_zalloc() was almost entirely redundant. Signed-off-by: David S. Miller --- net/core/dst.c | 20 +++++++++++-- net/decnet/dn_route.c | 2 ++ net/ipv4/route.c | 78 ++++++++++++++++++++++++++++++-------------------- net/ipv6/route.c | 8 +++++- net/xfrm/xfrm_policy.c | 1 + 5 files changed, 74 insertions(+), 35 deletions(-) (limited to 'net/decnet') diff --git a/net/core/dst.c b/net/core/dst.c index 9505778ec800..30f009327b62 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -175,22 +175,36 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, if (ops->gc(ops)) return NULL; } - dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); + dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - dst->ops = ops; + dst->child = NULL; dst->dev = dev; if (dev) dev_hold(dev); + dst->ops = ops; dst_init_metrics(dst, dst_default_metrics, true); + dst->expires = 0UL; dst->path = dst; + dst->neighbour = NULL; + dst->hh = NULL; +#ifdef CONFIG_XFRM + dst->xfrm = NULL; +#endif dst->input = dst_discard; dst->output = dst_discard; - + dst->error = 0; dst->obsolete = initial_obsolete; + dst->header_len = 0; + dst->trailer_len = 0; +#ifdef CONFIG_IP_ROUTE_CLASSID + dst->tclassid = 0; +#endif atomic_set(&dst->__refcnt, initial_ref); + dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; + dst->next = NULL; #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index f489b081c25d..74544bc6fdec 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1129,6 +1129,7 @@ make_route: if (rt == NULL) goto e_nobufs; + memset(&rt->fld, 0, sizeof(rt->fld)); rt->fld.saddr = oldflp->saddr; rt->fld.daddr = oldflp->daddr; rt->fld.flowidn_oif = oldflp->flowidn_oif; @@ -1398,6 +1399,7 @@ make_route: if (rt == NULL) goto e_nobufs; + memset(&rt->fld, 0, sizeof(rt->fld)); rt->rt_saddr = fld.saddr; rt->rt_daddr = fld.daddr; rt->rt_gateway = fld.daddr; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b471d89b57ee..fb9211adf079 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1830,7 +1830,6 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, #endif set_class_tag(rt, itag); #endif - rt->rt_type = type; } static struct rtable *rt_dst_alloc(struct net_device *dev, @@ -1877,25 +1876,28 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!rth) goto e_nobufs; +#ifdef CONFIG_IP_ROUTE_CLASSID + rth->dst.tclassid = itag; +#endif rth->dst.output = ip_rt_bug; rth->rt_key_dst = daddr; - rth->rt_dst = daddr; - rth->rt_tos = tos; - rth->rt_mark = skb->mark; rth->rt_key_src = saddr; + rth->rt_genid = rt_genid(dev_net(dev)); + rth->rt_flags = RTCF_MULTICAST; + rth->rt_type = RTN_MULTICAST; + rth->rt_tos = tos; + rth->rt_dst = daddr; rth->rt_src = saddr; -#ifdef CONFIG_IP_ROUTE_CLASSID - rth->dst.tclassid = itag; -#endif rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; + rth->rt_mark = skb->mark; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_genid = rt_genid(dev_net(dev)); - rth->rt_flags = RTCF_MULTICAST; - rth->rt_type = RTN_MULTICAST; + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -2017,25 +2019,28 @@ static int __mkroute_input(struct sk_buff *skb, } rth->rt_key_dst = daddr; - rth->rt_dst = daddr; - rth->rt_tos = tos; - rth->rt_mark = skb->mark; rth->rt_key_src = saddr; + rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); + rth->rt_flags = flags; + rth->rt_type = res->type; + rth->rt_tos = tos; + rth->rt_dst = daddr; rth->rt_src = saddr; - rth->rt_gateway = daddr; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; + rth->rt_mark = skb->mark; + rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; rth->dst.input = ip_forward; rth->dst.output = ip_output; - rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); - rth->rt_flags = flags; - *result = rth; err = 0; cleanup: @@ -2187,30 +2192,37 @@ local_input: if (!rth) goto e_nobufs; + rth->dst.input= ip_local_deliver; rth->dst.output= ip_rt_bug; - rth->rt_genid = rt_genid(net); +#ifdef CONFIG_IP_ROUTE_CLASSID + rth->dst.tclassid = itag; +#endif rth->rt_key_dst = daddr; - rth->rt_dst = daddr; - rth->rt_tos = tos; - rth->rt_mark = skb->mark; rth->rt_key_src = saddr; + rth->rt_genid = rt_genid(net); + rth->rt_flags = flags|RTCF_LOCAL; + rth->rt_type = res.type; + rth->rt_tos = tos; + rth->rt_dst = daddr; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; + rth->rt_oif = 0; + rth->rt_mark = skb->mark; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->dst.input= ip_local_deliver; - rth->rt_flags = flags|RTCF_LOCAL; + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; @@ -2391,20 +2403,25 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (!rth) return ERR_PTR(-ENOBUFS); + rth->dst.output = ip_output; + rth->rt_key_dst = oldflp4->daddr; - rth->rt_tos = tos; rth->rt_key_src = oldflp4->saddr; - rth->rt_oif = oldflp4->flowi4_oif; - rth->rt_mark = oldflp4->flowi4_mark; + rth->rt_genid = rt_genid(dev_net(dev_out)); + rth->rt_flags = flags; + rth->rt_type = type; + rth->rt_tos = tos; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; + rth->rt_oif = oldflp4->flowi4_oif; + rth->rt_mark = oldflp4->flowi4_mark; rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; - - rth->dst.output=ip_output; - rth->rt_genid = rt_genid(dev_net(dev_out)); + rth->rt_peer_genid = 0; + rth->peer = NULL; + rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2432,7 +2449,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rt_set_nexthop(rth, oldflp4, res, fi, type, 0); - rth->rt_flags = flags; return rth; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8b2bb9060ef..f1be5c5c85ef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -230,7 +230,11 @@ static struct rt6_info ip6_blk_hole_entry_template = { static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, struct net_device *dev) { - return (struct rt6_info *)dst_alloc(ops, dev, 0, 0, 0); + struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0); + + memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); + + return rt; } static void ip6_dst_destroy(struct dst_entry *dst) @@ -887,6 +891,8 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); if (rt) { + memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); + new = &rt->dst; new->__use = 1; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 70552c4e2272..00bcb88386c2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1349,6 +1349,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) BUG(); } xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); + memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry)); xfrm_policy_put_afinfo(afinfo); if (likely(xdst)) -- cgit v1.2.3 From e67f88dd12f610da98ca838822f2c9b4e7c6100e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Apr 2011 22:56:07 +0000 Subject: net: dont hold rtnl mutex during netlink dump callbacks Four years ago, Patrick made a change to hold rtnl mutex during netlink dump callbacks. I believe it was a wrong move. This slows down concurrent dumps, making good old /proc/net/ files faster than rtnetlink in some situations. This occurred to me because one "ip link show dev ..." was _very_ slow on a workload adding/removing network devices in background. All dump callbacks are able to use RCU locking now, so this patch does roughly a revert of commits : 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks This let writers fight for rtnl mutex and readers going full speed. It also takes care of phonet : phonet_route_get() is now called from rcu read section. I renamed it to phonet_route_get_rcu() Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Remi Denis-Courmont Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 2 +- net/bridge/br_netlink.c | 7 ++++--- net/core/fib_rules.c | 3 ++- net/core/rtnetlink.c | 12 +++++------- net/decnet/dn_dev.c | 10 ++++++---- net/ipv6/ip6_fib.c | 4 +++- net/phonet/pn_dev.c | 6 +----- net/phonet/pn_netlink.c | 4 +++- 8 files changed, 25 insertions(+), 23 deletions(-) (limited to 'net/decnet') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 13649eb57413..8639de5750f6 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -51,7 +51,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr); int phonet_route_add(struct net_device *dev, u8 daddr); int phonet_route_del(struct net_device *dev, u8 daddr); void rtm_phonet_notify(int event, struct net_device *dev, u8 dst); -struct net_device *phonet_route_get(struct net *net, u8 daddr); +struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr); struct net_device *phonet_route_output(struct net *net, u8 daddr); #define PN_NO_ADDR 0xff diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 134a2ff6b98b..ffb0dc4cc0e8 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -120,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int idx; idx = 0; - for_each_netdev(net, dev) { - struct net_bridge_port *port = br_port_get_rtnl(dev); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + struct net_bridge_port *port = br_port_get_rcu(dev); /* not a bridge port */ if (!port || idx < cb->args[0]) @@ -135,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) skip: ++idx; } - + rcu_read_unlock(); cb->args[0] = idx; return skb->len; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 8248ebb5891d..3911586e12e4 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, int idx = 0; struct fib_rule *rule; - list_for_each_entry(rule, &ops->rules_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d7c4bb4b1820..296331257195 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_idx = cb->args[1]; + rcu_read_lock(); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1023,6 +1024,7 @@ cont: } } out: + rcu_read_unlock(); cb->args[1] = idx; cb->args[0] = h; @@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) int min_len; int family; int type; - int err; type = nlh->nlmsg_type; if (type > RTM_MAX) @@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (dumpit == NULL) return -EOPNOTSUPP; - __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); - rtnl_lock(); - return err; + return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); } memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); @@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net) { struct sock *sk; sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, - rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); + rtnetlink_rcv, NULL, THIS_MODULE); if (!sk) return -ENOMEM; net->rtnl = sk; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0dcaa903e00e..404fa1591027 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -752,7 +752,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_naddr = cb->args[1]; idx = 0; - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if (idx < skip_ndevs) goto cont; else if (idx > skip_ndevs) { @@ -761,11 +762,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_naddr = 0; } - if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) + if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL) goto cont; - for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; - ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { + for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa; + ifa = rcu_dereference(ifa->ifa_next), dn_idx++) { if (dn_idx < skip_naddr) continue; @@ -778,6 +779,7 @@ cont: idx++; } done: + rcu_read_unlock(); cb->args[0] = idx; cb->args[1] = dn_idx; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index dd88df0a5d7f..4076a0b14b20 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -394,10 +394,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.net = net; w->args = &arg; + rcu_read_lock(); for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { if (e < s_e) goto next; res = fib6_dump_table(tb, skb, cb); @@ -408,6 +409,7 @@ next: } } out: + rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 947038ddd04c..47b3452675b6 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -426,18 +426,14 @@ int phonet_route_del(struct net_device *dev, u8 daddr) return 0; } -struct net_device *phonet_route_get(struct net *net, u8 daddr) +struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr) { struct phonet_net *pnn = phonet_pernet(net); struct phonet_routes *routes = &pnn->routes; struct net_device *dev; - ASSERT_RTNL(); /* no need to hold the device */ - daddr >>= 2; - rcu_read_lock(); dev = rcu_dereference(routes->table[daddr]); - rcu_read_unlock(); return dev; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 58b3b1f991ed..438accb7a5a8 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -264,10 +264,11 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; + rcu_read_lock(); for (addr = 0; addr < 64; addr++) { struct net_device *dev; - dev = phonet_route_get(net, addr << 2); + dev = phonet_route_get_rcu(net, addr << 2); if (!dev) continue; @@ -279,6 +280,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } out: + rcu_read_unlock(); cb->args[0] = addr_idx; cb->args[1] = 0; -- cgit v1.2.3