From dfd2ee086a63c730022cb095576a8b3a5a752109 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Feb 2024 17:30:31 +0000 Subject: ipv6: make addrconf_wq single threaded Both addrconf_verify_work() and addrconf_dad_work() acquire rtnl, there is no point trying to have one thread per cpu. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240201173031.3654257-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 733ace18806c..d63f5d063f07 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7349,7 +7349,8 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; - addrconf_wq = create_workqueue("ipv6_addrconf"); + /* All works using addrconf_wq need to lock rtnl. */ + addrconf_wq = create_singlethread_workqueue("ipv6_addrconf"); if (!addrconf_wq) { err = -ENOMEM; goto out_nowq; -- cgit v1.2.3 From 5eb902b8e7193cdcb33242af0a56502e6b5206e9 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:51 -0800 Subject: net/ipv6: Remove expired routes with a separated list of routes. FIB6 GC walks trees of fib6_tables to remove expired routes. Walking a tree can be expensive if the number of routes in a table is big, even if most of them are permanent. Checking routes in a separated list of routes having expiration will avoid this potential issue. Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 46 ++++++++++++++++++++++++++++++++++++++- net/ipv6/addrconf.c | 41 +++++++++++++++++++++++++++++------ net/ipv6/ip6_fib.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++----- net/ipv6/ndisc.c | 10 ++++++++- net/ipv6/route.c | 13 +++++++++-- 5 files changed, 154 insertions(+), 16 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 360b12e61850..323c94f1845b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -173,6 +173,9 @@ struct fib6_info { refcount_t fib6_ref; unsigned long expires; + + struct hlist_node gc_link; + struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) return rt->fib6_src.plen > 0; } +/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever + * been added to a table before. + */ static inline void fib6_clean_expires(struct fib6_info *f6i) { f6i->fib6_flags &= ~RTF_EXPIRES; f6i->expires = 0; } +/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever + * been added to a table before. + */ static inline void fib6_set_expires(struct fib6_info *f6i, unsigned long expires) { @@ -327,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i) static inline void fib6_info_release(struct fib6_info *f6i) { - if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) + if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { + DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); call_rcu(&f6i->rcu, fib6_info_destroy_rcu); + } } enum fib6_walk_state { @@ -382,6 +393,7 @@ struct fib6_table { struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; + struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -498,6 +510,38 @@ void fib6_gc_cleanup(void); int fib6_init(void); +/* Add the route to the gc list if it is not already there + * + * The callers should hold f6i->fib6_table->tb6_lock. + */ +static inline void fib6_add_gc_list(struct fib6_info *f6i) +{ + /* If fib6_node is null, the f6i is not in (or removed from) the + * table. + * + * There is a gap between finding the f6i from the table and + * calling this function without the protection of the tb6_lock. + * This check makes sure the f6i is not added to the gc list when + * it is not on the table. + */ + if (!rcu_dereference_protected(f6i->fib6_node, + lockdep_is_held(&f6i->fib6_table->tb6_lock))) + return; + + if (hlist_unhashed(&f6i->gc_link)) + hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist); +} + +/* Remove the route from the gc list if it is on the list. + * + * The callers should hold f6i->fib6_table->tb6_lock. + */ +static inline void fib6_remove_gc_list(struct fib6_info *f6i) +{ + if (!hlist_unhashed(&f6i->gc_link)) + hlist_del_init(&f6i->gc_link); +} + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d63f5d063f07..0ea44563454f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1255,6 +1255,7 @@ static void cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt, bool del_peer) { + struct fib6_table *table; struct fib6_info *f6i; f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr, @@ -1264,8 +1265,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, if (del_rt) ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); else { - if (!(f6i->fib6_flags & RTF_EXPIRES)) + if (!(f6i->fib6_flags & RTF_EXPIRES)) { + table = f6i->fib6_table; + spin_lock_bh(&table->tb6_lock); + fib6_set_expires(f6i, expires); + fib6_add_gc_list(f6i); + + spin_unlock_bh(&table->tb6_lock); + } fib6_info_release(f6i); } } @@ -2706,6 +2714,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr); void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; + struct fib6_table *table; __u32 valid_lft; __u32 prefered_lft; int addr_type, err; @@ -2782,11 +2791,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (valid_lft == 0) { ip6_del_rt(net, rt, false); rt = NULL; - } else if (addrconf_finite_timeout(rt_expires)) { - /* not infinity */ - fib6_set_expires(rt, jiffies + rt_expires); } else { - fib6_clean_expires(rt); + table = rt->fib6_table; + spin_lock_bh(&table->tb6_lock); + + if (addrconf_finite_timeout(rt_expires)) { + /* not infinity */ + fib6_set_expires(rt, jiffies + rt_expires); + fib6_add_gc_list(rt); + } else { + fib6_clean_expires(rt); + fib6_remove_gc_list(rt); + } + + spin_unlock_bh(&table->tb6_lock); } } else if (valid_lft) { clock_t expires = 0; @@ -4741,6 +4759,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, u32 flags, bool modify_peer) { + struct fib6_table *table; struct fib6_info *f6i; u32 prio; @@ -4761,10 +4780,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); } else { - if (!expires) + table = f6i->fib6_table; + spin_lock_bh(&table->tb6_lock); + + if (!expires) { fib6_clean_expires(f6i); - else + fib6_remove_gc_list(f6i); + } else { fib6_set_expires(f6i, expires); + fib6_add_gc_list(f6i); + } + + spin_unlock_bh(&table->tb6_lock); fib6_info_release(f6i); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 38a0348b1d17..805bbf26b3ef 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); + INIT_HLIST_NODE(&f6i->gc_link); + return f6i; } @@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); + INIT_HLIST_HEAD(&table->tb6_gc_hlist); } return table; @@ -1055,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, lockdep_is_held(&table->tb6_lock)); } } + + fib6_clean_expires(rt); + fib6_remove_gc_list(rt); } /* @@ -1115,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, rt->fib6_nsiblings = 0; if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; - if (!(rt->fib6_flags & RTF_EXPIRES)) + if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - else + fib6_remove_gc_list(iter); + } else { fib6_set_expires(iter, rt->expires); + fib6_add_gc_list(iter); + } if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, @@ -1477,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); + + if (rt->fib6_flags & RTF_EXPIRES) + fib6_add_gc_list(rt); + fib6_start_gc(info->nl_net, rt); } @@ -2280,9 +2293,8 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct fib6_info *rt, void *arg) +static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { - struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -2307,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg) return 0; } +static void fib6_gc_table(struct net *net, + struct fib6_table *tb6, + struct fib6_gc_args *gc_args) +{ + struct fib6_info *rt; + struct hlist_node *n; + struct nl_info info = { + .nl_net = net, + .skip_notify = false, + }; + + hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link) + if (fib6_age(rt, gc_args) == -1) + fib6_del(rt, &info); +} + +static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args) +{ + struct fib6_table *table; + struct hlist_head *head; + unsigned int h; + + rcu_read_lock(); + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, head, tb6_hlist) { + spin_lock_bh(&table->tb6_lock); + + fib6_gc_table(net, table, gc_args); + + spin_unlock_bh(&table->tb6_lock); + } + } + rcu_read_unlock(); +} + void fib6_run_gc(unsigned long expires, struct net *net, bool force) { struct fib6_gc_args gc_args; @@ -2322,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = 0; - fib6_clean_all(net, fib6_age, &gc_args); + fib6_gc_all(net, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; @@ -2382,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); + INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), @@ -2394,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); + INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist); #endif fib6_tables_init(net); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a68462668158..73cb31afe935 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) struct ndisc_options ndopts; struct fib6_info *rt = NULL; struct inet6_dev *in6_dev; + struct fib6_table *table; u32 defrtr_usr_metric; unsigned int pref = 0; __u32 old_if_flags; @@ -1410,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); } - if (rt) + if (rt) { + table = rt->fib6_table; + spin_lock_bh(&table->tb6_lock); + fib6_set_expires(rt, jiffies + (HZ * lifetime)); + fib6_add_gc_list(rt); + + spin_unlock_bh(&table->tb6_lock); + } if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dd6ff5b20918..707d65bc9c0e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; + struct fib6_table *table; unsigned int pref; unsigned long lifetime; struct fib6_info *rt; @@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (!addrconf_finite_timeout(lifetime)) + table = rt->fib6_table; + spin_lock_bh(&table->tb6_lock); + + if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - else + fib6_remove_gc_list(rt); + } else { fib6_set_expires(rt, jiffies + HZ * lifetime); + fib6_add_gc_list(rt); + } + + spin_unlock_bh(&table->tb6_lock); fib6_info_release(rt); } -- cgit v1.2.3 From 768e06a8bcab045de9eac87dd071a769119cd0fd Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:52 -0800 Subject: net/ipv6: set expires in modify_prefix_route() if RTF_EXPIRES is set. Make the decision to set or clean the expires of a route based on the RTF_EXPIRES flag, rather than the value of the "expires" argument. This patch doesn't make difference logically, but make inet6_addr_modify() and modify_prefix_route() consistent. The function inet6_addr_modify() is the only caller of modify_prefix_route(), and it passes the RTF_EXPIRES flag and an expiration value. The RTF_EXPIRES flag is turned on or off based on the value of valid_lft. The RTF_EXPIRES flag is turned on if valid_lft is a finite value (not infinite, not 0xffffffff). Even if valid_lft is 0, the RTF_EXPIRES flag remains on. The expiration value being passed is equal to the valid_lft value if the flag is on. However, if the valid_lft value is infinite, the expiration value becomes 0 and the RTF_EXPIRES flag is turned off. Despite this, modify_prefix_route() decides to set the expiration value if the received expiration value is not zero. This mixing of infinite and zero cases creates an inconsistency. Reviewed-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0ea44563454f..ca1b719323c0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4783,7 +4783,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, table = f6i->fib6_table; spin_lock_bh(&table->tb6_lock); - if (!expires) { + if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); fib6_remove_gc_list(f6i); } else { -- cgit v1.2.3 From 004d138364fd10dd5ff8ceb54cfdc2d792a7b338 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:39 +0000 Subject: net-sysfs: convert dev->operstate reads to lockless ones operstate_show() can omit dev_base_lock acquisition only to read dev->operstate. Annotate accesses to dev->operstate. Writers still acquire dev_base_lock for mutual exclusion. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 3 ++- net/core/link_watch.c | 4 ++-- net/core/net-sysfs.c | 4 +--- net/core/rtnetlink.c | 4 ++-- net/hsr/hsr_device.c | 10 +++++----- net/ipv6/addrconf.c | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5ad4abfcb7ba..2cf4fc756263 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -455,7 +455,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, u32 filter_mask, const struct net_device *dev, bool getlink) { - u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + u8 operstate = netif_running(dev) ? READ_ONCE(dev->operstate) : + IF_OPER_DOWN; struct nlattr *af = NULL; struct net_bridge *br; struct ifinfomsg *hdr; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 429571c258da..1b93e054c9a3 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -67,7 +67,7 @@ static void rfc2863_policy(struct net_device *dev) { unsigned char operstate = default_operstate(dev); - if (operstate == dev->operstate) + if (operstate == READ_ONCE(dev->operstate)) return; write_lock(&dev_base_lock); @@ -87,7 +87,7 @@ static void rfc2863_policy(struct net_device *dev) break; } - dev->operstate = operstate; + WRITE_ONCE(dev->operstate, operstate); write_unlock(&dev_base_lock); } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 23ef2df549c3..c5d164b8c6bf 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -325,11 +325,9 @@ static ssize_t operstate_show(struct device *dev, const struct net_device *netdev = to_net_dev(dev); unsigned char operstate; - read_lock(&dev_base_lock); - operstate = netdev->operstate; + operstate = READ_ONCE(netdev->operstate); if (!netif_running(netdev)) operstate = IF_OPER_DOWN; - read_unlock(&dev_base_lock); if (operstate >= ARRAY_SIZE(operstates)) return -EINVAL; /* should not happen */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fd47a4422a51..43d92de8601c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -866,9 +866,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; } - if (dev->operstate != operstate) { + if (READ_ONCE(dev->operstate) != operstate) { write_lock(&dev_base_lock); - dev->operstate = operstate; + WRITE_ONCE(dev->operstate, operstate); write_unlock(&dev_base_lock); netdev_state_change(dev); } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 9d71b66183da..be0e43f46556 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -31,8 +31,8 @@ static bool is_slave_up(struct net_device *dev) static void __hsr_set_operstate(struct net_device *dev, int transition) { write_lock(&dev_base_lock); - if (dev->operstate != transition) { - dev->operstate = transition; + if (READ_ONCE(dev->operstate) != transition) { + WRITE_ONCE(dev->operstate, transition); write_unlock(&dev_base_lock); netdev_state_change(dev); } else { @@ -78,14 +78,14 @@ static void hsr_check_announce(struct net_device *hsr_dev, hsr = netdev_priv(hsr_dev); - if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) { + if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) { /* Went up */ hsr->announce_count = 0; mod_timer(&hsr->announce_timer, jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } - if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP) + if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP) /* Went down */ del_timer(&hsr->announce_timer); } @@ -100,7 +100,7 @@ void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ - old_operstate = master->dev->operstate; + old_operstate = READ_ONCE(master->dev->operstate); has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); hsr_check_announce(master->dev, old_operstate); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ca1b719323c0..2c1ed642e3f4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6025,7 +6025,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, (dev->ifindex != dev_get_iflink(dev) && nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || nla_put_u8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN)) + netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN)) goto nla_put_failure; protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO); if (!protoinfo) -- cgit v1.2.3 From 2aa8f155b09519814e449dc19adacf01fd1367ee Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 13 Feb 2024 23:26:30 -0700 Subject: net: ipv6/addrconf: ensure that regen_advance is at least 2 seconds RFC 8981 defines REGEN_ADVANCE as follows: REGEN_ADVANCE = 2 + (TEMP_IDGEN_RETRIES * DupAddrDetectTransmits * RetransTimer / 1000) Thus, allowing it to be less than 2 seconds is technically a protocol violation. Link: https://datatracker.ietf.org/doc/html/rfc8981#name-defined-protocol-parameters Signed-off-by: Alex Henrie Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 4 ++-- net/ipv6/addrconf.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 7afff42612e9..458305931345 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2503,7 +2503,7 @@ use_tempaddr - INTEGER temp_valid_lft - INTEGER valid lifetime (in seconds) for temporary addresses. If less than the - minimum required lifetime (typically 5 seconds), temporary addresses + minimum required lifetime (typically 5-7 seconds), temporary addresses will not be created. Default: 172800 (2 days) @@ -2511,7 +2511,7 @@ temp_valid_lft - INTEGER temp_prefered_lft - INTEGER Preferred lifetime (in seconds) for temporary addresses. If temp_prefered_lft is less than the minimum required lifetime (typically - 5 seconds), temporary addresses will not be created. If + 5-7 seconds), temporary addresses will not be created. If temp_prefered_lft is greater than temp_valid_lft, the preferred lifetime is temp_valid_lft. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2c1ed642e3f4..65e886d7d80c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1339,6 +1339,13 @@ out: in6_ifa_put(ifp); } +static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) +{ + return 2 + idev->cnf.regen_max_retry * + idev->cnf.dad_transmits * + max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; +} + static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) { struct inet6_dev *idev = ifp->idev; @@ -1380,9 +1387,7 @@ retry: age = (now - ifp->tstamp) / HZ; - regen_advance = idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + regen_advance = ipv6_get_regen_advance(idev); /* recalculate max_desync_factor each time and update * idev->desync_factor if it's larger @@ -4595,9 +4600,7 @@ restart: !ifp->regen_count && ifp->ifpub) { /* This is a non-regenerated temporary addr. */ - unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * - ifp->idev->cnf.dad_transmits * - max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + unsigned long regen_advance = ipv6_get_regen_advance(ifp->idev); if (age + regen_advance >= ifp->prefered_lft) { struct inet6_ifaddr *ifpub = ifp->ifpub; -- cgit v1.2.3 From a5fcea2d2f790aa90b6e996d411ae2cf8db55186 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 13 Feb 2024 23:26:31 -0700 Subject: net: ipv6/addrconf: introduce a regen_min_advance sysctl In RFC 8981, REGEN_ADVANCE cannot be less than 2 seconds, and the RFC does not permit the creation of temporary addresses with lifetimes shorter than that: > When processing a Router Advertisement with a > Prefix Information option carrying a prefix for the purposes of > address autoconfiguration (i.e., the A bit is set), the host MUST > perform the following steps: > 5. A temporary address is created only if this calculated preferred > lifetime is greater than REGEN_ADVANCE time units. However, some users want to change their IPv6 address as frequently as possible regardless of the RFC's arbitrary minimum lifetime. For the benefit of those users, add a regen_min_advance sysctl parameter that can be set to below or above 2 seconds. Link: https://datatracker.ietf.org/doc/html/rfc8981 Signed-off-by: Alex Henrie Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 10 ++++++++++ include/linux/ipv6.h | 1 + include/net/addrconf.h | 5 +++-- net/ipv6/addrconf.c | 11 ++++++++++- 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 458305931345..407d917d1a36 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2535,6 +2535,16 @@ max_desync_factor - INTEGER Default: 600 +regen_min_advance - INTEGER + How far in advance (in seconds), at minimum, to create a new temporary + address before the current one is deprecated. This value is added to + the amount of time that may be required for duplicate address detection + to determine when to create a new address. Linux permits setting this + value to less than the default of 2 seconds, but a value less than 2 + does not conform to RFC 8981. + + Default: 2 + regen_max_retry - INTEGER Number of attempts before give up attempting to generate valid temporary addresses. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5e605e384aac..ef3aa060a289 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -27,6 +27,7 @@ struct ipv6_devconf { __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; + __s32 regen_min_advance; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 61ebe723ee4d..30d6f1e84e46 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -8,8 +8,9 @@ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ -#define TEMP_VALID_LIFETIME (7*86400) -#define TEMP_PREFERRED_LIFETIME (86400) +#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */ +#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */ +#define REGEN_MIN_ADVANCE (2) /* 2 seconds */ #define REGEN_MAX_RETRY (3) #define MAX_DESYNC_FACTOR (600) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 65e886d7d80c..283823fba96a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, + .regen_min_advance = REGEN_MIN_ADVANCE, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, @@ -257,6 +258,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, + .regen_min_advance = REGEN_MIN_ADVANCE, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, @@ -1341,7 +1343,7 @@ out: static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) { - return 2 + idev->cnf.regen_max_retry * + return idev->cnf.regen_min_advance + idev->cnf.regen_max_retry * idev->cnf.dad_transmits * max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; } @@ -6819,6 +6821,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "regen_min_advance", + .data = &ipv6_devconf.regen_min_advance, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "regen_max_retry", .data = &ipv6_devconf.regen_max_retry, -- cgit v1.2.3 From f4bcbf360ac8dc424dc4d2b384b528e69b6f34d9 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 13 Feb 2024 23:26:32 -0700 Subject: net: ipv6/addrconf: clamp preferred_lft to the minimum required If the preferred lifetime was less than the minimum required lifetime, ipv6_create_tempaddr would error out without creating any new address. On my machine and network, this error happened immediately with the preferred lifetime set to 5 seconds or less, after a few minutes with the preferred lifetime set to 6 seconds, and not at all with the preferred lifetime set to 7 seconds. During my investigation, I found a Stack Exchange post from another person who seems to have had the same problem: They stopped getting new addresses if they lowered the preferred lifetime below 3 seconds, and they didn't really know why. The preferred lifetime is a preference, not a hard requirement. The kernel does not strictly forbid new connections on a deprecated address, nor does it guarantee that the address will be disposed of the instant its total valid lifetime expires. So rather than disable IPv6 privacy extensions altogether if the minimum required lifetime swells above the preferred lifetime, it is more in keeping with the user's intent to increase the temporary address's lifetime to the minimum necessary for the current network conditions. With these fixes, setting the preferred lifetime to 5 or 6 seconds "just works" because the extra fraction of a second is practically unnoticeable. It's even possible to reduce the time before deprecation to 1 or 2 seconds by setting /proc/sys/net/ipv6/conf/*/regen_min_advance and /proc/sys/net/ipv6/conf/*/dad_transmits to 0. I realize that that is a pretty niche use case, but I know at least one person who would gladly sacrifice performance and convenience to be sure that they are getting the maximum possible level of privacy. Link: https://serverfault.com/a/1031168/310447 Signed-off-by: Alex Henrie Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv6/addrconf.c | 43 +++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 10 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 407d917d1a36..bd50df6a5a42 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2511,7 +2511,7 @@ temp_valid_lft - INTEGER temp_prefered_lft - INTEGER Preferred lifetime (in seconds) for temporary addresses. If temp_prefered_lft is less than the minimum required lifetime (typically - 5-7 seconds), temporary addresses will not be created. If + 5-7 seconds), the preferred lifetime is the minimum required. If temp_prefered_lft is greater than temp_valid_lft, the preferred lifetime is temp_valid_lft. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 283823fba96a..d3f4b7b9cf1f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1354,6 +1354,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) unsigned long tmp_tstamp, age; unsigned long regen_advance; unsigned long now = jiffies; + u32 if_public_preferred_lft; s32 cnf_temp_preferred_lft; struct inet6_ifaddr *ift; struct ifa6_config cfg; @@ -1409,11 +1410,13 @@ retry: } } + if_public_preferred_lft = ifp->prefered_lft; + memset(&cfg, 0, sizeof(cfg)); cfg.valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; - cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft); + cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft); cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); cfg.plen = ifp->prefix_len; @@ -1422,19 +1425,41 @@ retry: write_unlock_bh(&idev->lock); - /* A temporary address is created only if this calculated Preferred - * Lifetime is greater than REGEN_ADVANCE time units. In particular, - * an implementation must not create a temporary address with a zero - * Preferred Lifetime. + /* From RFC 4941: + * + * A temporary address is created only if this calculated Preferred + * Lifetime is greater than REGEN_ADVANCE time units. In + * particular, an implementation must not create a temporary address + * with a zero Preferred Lifetime. + * + * ... + * + * When creating a temporary address, the lifetime values MUST be + * derived from the corresponding prefix as follows: + * + * ... + * + * * Its Preferred Lifetime is the lower of the Preferred Lifetime + * of the public address or TEMP_PREFERRED_LIFETIME - + * DESYNC_FACTOR. + * + * To comply with the RFC's requirements, clamp the preferred lifetime + * to a minimum of regen_advance, unless that would exceed valid_lft or + * ifp->prefered_lft. + * * Use age calculation as in addrconf_verify to avoid unnecessary * temporary addresses being generated. */ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; if (cfg.preferred_lft <= regen_advance + age) { - in6_ifa_put(ifp); - in6_dev_put(idev); - ret = -1; - goto out; + cfg.preferred_lft = regen_advance + age + 1; + if (cfg.preferred_lft > cfg.valid_lft || + cfg.preferred_lft > if_public_preferred_lft) { + in6_ifa_put(ifp); + in6_dev_put(idev); + ret = -1; + goto out; + } } cfg.ifa_flags = IFA_F_TEMPORARY; -- cgit v1.2.3 From 4ad268136421dc9813280c55940eca00796420e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:09 +0000 Subject: ipv6: prepare inet6_fill_ifla6_attrs() for RCU We want to no longer hold RTNL while calling inet6_fill_ifla6_attrs() in the future. Add needed READ_ONCE()/WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 163 ++++++++++++++++++++++++++++------------------------ net/ipv6/ndisc.c | 2 +- 2 files changed, 90 insertions(+), 75 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c669ea266ab7..a56dad307fe3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3492,7 +3492,8 @@ static void addrconf_dev_config(struct net_device *dev) /* this device type has no EUI support */ if (dev->type == ARPHRD_NONE && idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) - idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; + WRITE_ONCE(idev->cnf.addr_gen_mode, + IN6_ADDR_GEN_MODE_RANDOM); addrconf_addr_gen(idev, false); } @@ -3764,7 +3765,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, rt6_mtu_change(dev, dev->mtu); idev->cnf.mtu6 = dev->mtu; } - idev->tstamp = jiffies; + WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); /* @@ -4006,7 +4007,7 @@ restart: ipv6_mc_down(idev); } - idev->tstamp = jiffies; + WRITE_ONCE(idev->tstamp, jiffies); idev->ra_mtu = 0; /* Last: Shot the device (if unregistered) */ @@ -5634,87 +5635,97 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); } -static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, - __s32 *array, int bytes) +static void ipv6_store_devconf(const struct ipv6_devconf *cnf, + __s32 *array, int bytes) { BUG_ON(bytes < (DEVCONF_MAX * 4)); memset(array, 0, bytes); - array[DEVCONF_FORWARDING] = cnf->forwarding; - array[DEVCONF_HOPLIMIT] = cnf->hop_limit; - array[DEVCONF_MTU6] = cnf->mtu6; - array[DEVCONF_ACCEPT_RA] = cnf->accept_ra; - array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects; - array[DEVCONF_AUTOCONF] = cnf->autoconf; - array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; - array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; + array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding); + array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit); + array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6); + array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra); + array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects); + array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf); + array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits); + array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits); array[DEVCONF_RTR_SOLICIT_INTERVAL] = - jiffies_to_msecs(cnf->rtr_solicit_interval); + jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval)); array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = - jiffies_to_msecs(cnf->rtr_solicit_max_interval); + jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval)); array[DEVCONF_RTR_SOLICIT_DELAY] = - jiffies_to_msecs(cnf->rtr_solicit_delay); - array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; + jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay)); + array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version); array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] = - jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval); + jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval)); array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = - jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval); - array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; - array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; - array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; - array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; - array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; - array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; - array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; - array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric; - array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit; - array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; + jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval)); + array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr); + array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft); + array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft); + array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry); + array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor); + array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses); + array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr); + array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric); + array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = + READ_ONCE(cnf->accept_ra_min_hop_limit); + array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo); #ifdef CONFIG_IPV6_ROUTER_PREF - array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; + array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref); array[DEVCONF_RTR_PROBE_INTERVAL] = - jiffies_to_msecs(cnf->rtr_probe_interval); + jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval)); #ifdef CONFIG_IPV6_ROUTE_INFO - array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen; - array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; + array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = + READ_ONCE(cnf->accept_ra_rt_info_min_plen); + array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = + READ_ONCE(cnf->accept_ra_rt_info_max_plen); #endif #endif - array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; - array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; + array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp); + array[DEVCONF_ACCEPT_SOURCE_ROUTE] = + READ_ONCE(cnf->accept_source_route); #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; - array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; + array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad); + array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic); #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif - array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; - array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; - array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; - array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; - array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; - array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; - array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; - array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown; + array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6); + array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad); + array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao); + array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify); + array[DEVCONF_SUPPRESS_FRAG_NDISC] = + READ_ONCE(cnf->suppress_frag_ndisc); + array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = + READ_ONCE(cnf->accept_ra_from_local); + array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu); + array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = + READ_ONCE(cnf->ignore_routes_with_linkdown); /* we omit DEVCONF_STABLE_SECRET for now */ - array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; - array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; - array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; - array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; - array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; + array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only); + array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = + READ_ONCE(cnf->drop_unicast_in_l2_multicast); + array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na); + array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down); + array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled); #ifdef CONFIG_IPV6_SEG6_HMAC - array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; + array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac); #endif - array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; - array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; - array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; - array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; - array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; - array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; - array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; - array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; - array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; - array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na; - array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft; + array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad); + array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode); + array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy); + array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass); + array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled); + array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled); + array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id); + array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide); + array[DEVCONF_NDISC_EVICT_NOCARRIER] = + READ_ONCE(cnf->ndisc_evict_nocarrier); + array[DEVCONF_ACCEPT_UNTRACKED_NA] = + READ_ONCE(cnf->accept_untracked_na); + array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft); } static inline size_t inet6_ifla6_size(void) @@ -5794,13 +5805,14 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, u32 ext_filter_mask) { - struct nlattr *nla; struct ifla_cacheinfo ci; + struct nlattr *nla; + u32 ra_mtu; - if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags)) + if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags))) goto nla_put_failure; ci.max_reasm_len = IPV6_MAXPLEN; - ci.tstamp = cstamp_delta(idev->tstamp); + ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp)); ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME)); if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) @@ -5832,11 +5844,12 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); read_unlock_bh(&idev->lock); - if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, + READ_ONCE(idev->cnf.addr_gen_mode))) goto nla_put_failure; - if (idev->ra_mtu && - nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu)) + ra_mtu = READ_ONCE(idev->ra_mtu); + if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu)) goto nla_put_failure; return 0; @@ -6037,7 +6050,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, if (tb[IFLA_INET6_ADDR_GEN_MODE]) { u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); - idev->cnf.addr_gen_mode = mode; + WRITE_ONCE(idev->cnf.addr_gen_mode, mode); } return 0; @@ -6516,7 +6529,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } if (idev->cnf.addr_gen_mode != new_val) { - idev->cnf.addr_gen_mode = new_val; + WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); addrconf_init_auto_addrs(idev->dev); } } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { @@ -6527,7 +6540,8 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, idev = __in6_dev_get(dev); if (idev && idev->cnf.addr_gen_mode != new_val) { - idev->cnf.addr_gen_mode = new_val; + WRITE_ONCE(idev->cnf.addr_gen_mode, + new_val); addrconf_init_auto_addrs(idev->dev); } } @@ -6592,14 +6606,15 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, struct inet6_dev *idev = __in6_dev_get(dev); if (idev) { - idev->cnf.addr_gen_mode = - IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + WRITE_ONCE(idev->cnf.addr_gen_mode, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY); } } } else { struct inet6_dev *idev = ctl->extra1; - idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + WRITE_ONCE(idev->cnf.addr_gen_mode, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY); } out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 73cb31afe935..8523f0595b01 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1975,7 +1975,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer, if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) idev->nd_parms->reachable_time = neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); - idev->tstamp = jiffies; + WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); } -- cgit v1.2.3 From 8afc7a78d55de726b2747d7775c54def79509ec5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:10 +0000 Subject: ipv6: prepare inet6_fill_ifinfo() for RCU protection We want to use RCU protection instead of RTNL for inet6_fill_ifinfo(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- net/core/dev.c | 4 ++-- net/ipv6/addrconf.c | 11 +++++++---- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f07c8374f29c..09023e44db4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4354,8 +4354,10 @@ static inline bool netif_testing(const struct net_device *dev) */ static inline bool netif_oper_up(const struct net_device *dev) { - return (dev->operstate == IF_OPER_UP || - dev->operstate == IF_OPER_UNKNOWN /* backward compat */); + unsigned int operstate = READ_ONCE(dev->operstate); + + return operstate == IF_OPER_UP || + operstate == IF_OPER_UNKNOWN /* backward compat */; } /** diff --git a/net/core/dev.c b/net/core/dev.c index 0628d8ff1ed9..275fd5259a4a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8632,12 +8632,12 @@ unsigned int dev_get_flags(const struct net_device *dev) { unsigned int flags; - flags = (dev->flags & ~(IFF_PROMISC | + flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC | IFF_ALLMULTI | IFF_RUNNING | IFF_LOWER_UP | IFF_DORMANT)) | - (dev->gflags & (IFF_PROMISC | + (READ_ONCE(dev->gflags) & (IFF_PROMISC | IFF_ALLMULTI)); if (netif_running(dev)) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a56dad307fe3..daa81556d118 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6062,6 +6062,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, struct net_device *dev = idev->dev; struct ifinfomsg *hdr; struct nlmsghdr *nlh; + int ifindex, iflink; void *protoinfo; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); @@ -6072,16 +6073,18 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, hdr->ifi_family = AF_INET6; hdr->__ifi_pad = 0; hdr->ifi_type = dev->type; - hdr->ifi_index = dev->ifindex; + ifindex = READ_ONCE(dev->ifindex); + hdr->ifi_index = ifindex; hdr->ifi_flags = dev_get_flags(dev); hdr->ifi_change = 0; + iflink = dev_get_iflink(dev); if (nla_put_string(skb, IFLA_IFNAME, dev->name) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - nla_put_u32(skb, IFLA_MTU, dev->mtu) || - (dev->ifindex != dev_get_iflink(dev) && - nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || + nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) || + (ifindex != iflink && + nla_put_u32(skb, IFLA_LINK, iflink)) || nla_put_u8(skb, IFLA_OPERSTATE, netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN)) goto nla_put_failure; -- cgit v1.2.3 From ac14ad9755d4f8f286fd2cd710aa5dcf9a3c777a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:11 +0000 Subject: ipv6: use xarray iterator to implement inet6_dump_ifinfo() Prepare inet6_dump_ifinfo() to run with RCU protection instead of RTNL and use for_each_netdev_dump() interface. Also properly return 0 at the end of a dump, avoiding an extra recvmsg() system call and RTNL acquisition. Note that RTNL-less dumps need core changes, coming later in the series. Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index daa81556d118..6556136d2b8d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6132,50 +6132,42 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh, static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int h, s_h; - int idx = 0, s_idx; + struct { + unsigned long ifindex; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; + int err; /* only requests using strict checking can pass data to * influence the dump */ if (cb->strict_check) { - int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack); + err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack); if (err < 0) return err; } - s_h = cb->args[0]; - s_idx = cb->args[1]; - + err = 0; rcu_read_lock(); - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - if (inet6_fill_ifinfo(skb, idev, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWLINK, NLM_F_MULTI) < 0) - goto out; -cont: - idx++; + for_each_netdev_dump(net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = inet6_fill_ifinfo(skb, idev, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWLINK, NLM_F_MULTI); + if (err < 0) { + if (likely(skb->len)) + err = skb->len; + break; } } -out: rcu_read_unlock(); - cb->args[1] = idx; - cb->args[0] = h; - return skb->len; + return err; } void inet6_ifinfo_notify(int event, struct inet6_dev *idev) -- cgit v1.2.3 From 69fdb7e411b6d01eb08447e672302b69c9e176ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:16 +0000 Subject: ipv6: switch inet6_dump_ifinfo() to RCU protection No longer hold RTNL while calling inet6_dump_ifinfo() Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6556136d2b8d..a280614b3765 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7462,7 +7462,7 @@ int __init addrconf_init(void) rtnl_af_register(&inet6_ops); err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK, - NULL, inet6_dump_ifinfo, 0); + NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; -- cgit v1.2.3 From c3718936ec47ae811a7ce9a618b6cb1cda835bab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Feb 2024 20:10:54 +0000 Subject: ipv6: anycast: complete RCU handling of struct ifacaddr6 struct ifacaddr6 are already freed after RCU grace period. Add __rcu qualifier to aca_next pointer, and idev->ac_list Add relevant rcu_assign_pointer() and dereference accessors. ipv6_chk_acast_dev() no longer needs to acquire idev->lock. /proc/net/anycast6 is now purely RCU protected, it no longer acquires idev->lock. Similarly in6_dump_addrs() can use RCU protection to iterate through anycast addresses. It was relying on a mixture of RCU and RTNL but next patches will get rid of RTNL there. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240223201054.220534-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/if_inet6.h | 4 ++-- net/ipv6/addrconf.c | 4 ++-- net/ipv6/anycast.c | 61 +++++++++++++++++++------------------------------- 3 files changed, 27 insertions(+), 42 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f07642264c1e..238ad3349456 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -144,7 +144,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; - struct ifacaddr6 *aca_next; + struct ifacaddr6 __rcu *aca_next; struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; @@ -196,7 +196,7 @@ struct inet6_dev { spinlock_t mc_report_lock; /* mld query report lock */ struct mutex mc_lock; /* mld global lock */ - struct ifacaddr6 *ac_list; + struct ifacaddr6 __rcu *ac_list; rwlock_t lock; refcount_t refcnt; __u32 if_flags; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a280614b3765..e27069ad938c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5314,8 +5314,8 @@ next: case ANYCAST_ADDR: fillargs->event = RTM_GETANYCAST; /* anycast address */ - for (ifaca = idev->ac_list; ifaca; - ifaca = ifaca->aca_next, ip_idx++) { + for (ifaca = rcu_dereference(idev->ac_list); ifaca; + ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, fillargs); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index bb17f484ee2c..0f2506e35359 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -296,7 +296,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) goto out; } - for (aca = idev->ac_list; aca; aca = aca->aca_next) { + for (aca = rtnl_dereference(idev->ac_list); aca; + aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; @@ -317,13 +318,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) goto out; } - aca->aca_next = idev->ac_list; - idev->ac_list = aca; - /* Hold this for addrconf_join_solict() below before we unlock, * it is already exposed via idev->ac_list. */ aca_get(aca); + aca->aca_next = idev->ac_list; + rcu_assign_pointer(idev->ac_list, aca); + write_unlock_bh(&idev->lock); ipv6_add_acaddr_hash(net, aca); @@ -350,7 +351,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_lock_bh(&idev->lock); prev_aca = NULL; - for (aca = idev->ac_list; aca; aca = aca->aca_next) { + for (aca = rtnl_dereference(idev->ac_list); aca; + aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; @@ -364,9 +366,9 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } if (prev_aca) - prev_aca->aca_next = aca->aca_next; + rcu_assign_pointer(prev_aca->aca_next, aca->aca_next); else - idev->ac_list = aca->aca_next; + rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); @@ -392,8 +394,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) struct ifacaddr6 *aca; write_lock_bh(&idev->lock); - while ((aca = idev->ac_list) != NULL) { - idev->ac_list = aca->aca_next; + while ((aca = rtnl_dereference(idev->ac_list)) != NULL) { + rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); @@ -420,11 +422,10 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - for (aca = idev->ac_list; aca; aca = aca->aca_next) + for (aca = rcu_dereference(idev->ac_list); aca; + aca = rcu_dereference(aca->aca_next)) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; - read_unlock_bh(&idev->lock); return aca != NULL; } return false; @@ -477,30 +478,25 @@ bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, struct ac6_iter_state { struct seq_net_private p; struct net_device *dev; - struct inet6_dev *idev; }; #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) { - struct ifacaddr6 *im = NULL; struct ac6_iter_state *state = ac6_seq_private(seq); struct net *net = seq_file_net(seq); + struct ifacaddr6 *im = NULL; - state->idev = NULL; for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; + idev = __in6_dev_get(state->dev); if (!idev) continue; - read_lock_bh(&idev->lock); - im = idev->ac_list; - if (im) { - state->idev = idev; + im = rcu_dereference(idev->ac_list); + if (im) break; - } - read_unlock_bh(&idev->lock); } return im; } @@ -508,22 +504,17 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) { struct ac6_iter_state *state = ac6_seq_private(seq); + struct inet6_dev *idev; - im = im->aca_next; + im = rcu_dereference(im->aca_next); while (!im) { - if (likely(state->idev != NULL)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); - if (!state->dev) { - state->idev = NULL; + if (!state->dev) break; - } - state->idev = __in6_dev_get(state->dev); - if (!state->idev) + idev = __in6_dev_get(state->dev); + if (!idev) continue; - read_lock_bh(&state->idev->lock); - im = state->idev->ac_list; + im = rcu_dereference(idev->ac_list); } return im; } @@ -555,12 +546,6 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ac6_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { - struct ac6_iter_state *state = ac6_seq_private(seq); - - if (likely(state->idev != NULL)) { - read_unlock_bh(&state->idev->lock); - state->idev = NULL; - } rcu_read_unlock(); } -- cgit v1.2.3 From 67ea41d19d2aad2da2bcaeb7992c4fff9dc28a8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Feb 2024 22:22:59 +0000 Subject: inet6: expand rcu_read_lock() scope in inet6_dump_addr() I missed that inet6_dump_addr() is calling in6_dump_addrs() from two points. First one under RTNL protection, and second one under rcu_read_lock(). Since we want to remove RTNL use from inet6_dump_addr() very soon, no longer assume in6_dump_addrs() is protected by RTNL (even if this is still the case). Use rcu_read_lock() earlier to fix this lockdep splat: WARNING: suspicious RCU usage 6.8.0-rc5-syzkaller-01618-gf8cbf6bde4c8 #0 Not tainted net/ipv6/addrconf.c:5317 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by syz-executor.2/8834: #0: ffff88802f554678 (nlk_cb_mutex-ROUTE){+.+.}-{3:3}, at: __netlink_dump_start+0x119/0x780 net/netlink/af_netlink.c:2338 #1: ffffffff8f377a88 (rtnl_mutex){+.+.}-{3:3}, at: netlink_dump+0x676/0xda0 net/netlink/af_netlink.c:2265 #2: ffff88807e5f0580 (&ndev->lock){++--}-{2:2}, at: in6_dump_addrs+0xb8/0x1de0 net/ipv6/addrconf.c:5279 stack backtrace: CPU: 1 PID: 8834 Comm: syz-executor.2 Not tainted 6.8.0-rc5-syzkaller-01618-gf8cbf6bde4c8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 lockdep_rcu_suspicious+0x220/0x340 kernel/locking/lockdep.c:6712 in6_dump_addrs+0x1b47/0x1de0 net/ipv6/addrconf.c:5317 inet6_dump_addr+0x1597/0x1690 net/ipv6/addrconf.c:5428 netlink_dump+0x6a6/0xda0 net/netlink/af_netlink.c:2266 __netlink_dump_start+0x59d/0x780 net/netlink/af_netlink.c:2374 netlink_dump_start include/linux/netlink.h:340 [inline] rtnetlink_rcv_msg+0xcf7/0x10d0 net/core/rtnetlink.c:6555 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2547 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8e0/0xcb0 net/netlink/af_netlink.c:1902 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 Fixes: c3718936ec47 ("ipv6: anycast: complete RCU handling of struct ifacaddr6") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240227222259.4081489-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e27069ad938c..953a95898e4a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5300,9 +5300,9 @@ next: fillargs->event = RTM_GETMULTICAST; /* multicast address */ - for (ifmca = rtnl_dereference(idev->mc_list); + for (ifmca = rcu_dereference(idev->mc_list); ifmca; - ifmca = rtnl_dereference(ifmca->next), ip_idx++) { + ifmca = rcu_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); @@ -5410,6 +5410,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_idx = idx = cb->args[1]; s_ip_idx = cb->args[2]; + rcu_read_lock(); if (cb->strict_check) { err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); @@ -5434,7 +5435,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } } - rcu_read_lock(); cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; @@ -5456,10 +5456,10 @@ cont: } } done: - rcu_read_unlock(); cb->args[0] = h; cb->args[1] = idx; put_tgt_net: + rcu_read_unlock(); if (fillargs.netnsid >= 0) put_net(tgt_net); -- cgit v1.2.3 From d289ab65b89c1d4d88417cb6c03e923f21f95fae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:26 +0000 Subject: ipv6: annotate data-races around cnf.disable_ipv6 disable_ipv6 is read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. v2: do not preload net before rtnl_trylock() in addrconf_disable_ipv6() (Jiri) Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 +++++---- net/ipv6/ip6_input.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9e949403c136..c965400fb934 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4214,7 +4214,7 @@ static void addrconf_dad_work(struct work_struct *w) if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && ipv6_addr_equal(&ifp->addr, &addr)) { /* DAD failed for link-local based on MAC */ - idev->cnf.disable_ipv6 = 1; + WRITE_ONCE(idev->cnf.disable_ipv6, 1); pr_info("%s: IPv6 being disabled!\n", ifp->idev->dev->name); @@ -6389,7 +6389,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); - idev->cnf.disable_ipv6 = newf; + + WRITE_ONCE(idev->cnf.disable_ipv6, newf); if (changed) dev_disable_change(idev); } @@ -6406,7 +6407,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->disable_ipv6) { rtnl_unlock(); @@ -6414,7 +6415,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->disable_ipv6) { - net->ipv6.devconf_dflt->disable_ipv6 = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); } else if ((!newf) ^ (!old)) dev_disable_change((struct inet6_dev *)table->extra1); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index b8378814532c..1ba97933c74f 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,9 +168,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || - !idev || unlikely(idev->cnf.disable_ipv6)) { + !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - if (idev && unlikely(idev->cnf.disable_ipv6)) + if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6))) SKB_DR_SET(reason, IPV6DISABLED); goto drop; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31b86fe661aa..0559bd000585 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(idev->cnf.disable_ipv6)) { + if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; -- cgit v1.2.3 From 553ced03b227c99f0bf68d86f7b45ed7125563f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:27 +0000 Subject: ipv6: addrconf_disable_ipv6() optimization Writing over /proc/sys/net/ipv6/conf/default/disable_ipv6 does not need to hold RTNL. v3: remove a wrong change (Jakub Kicinski feedback) Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c965400fb934..6975685f55ea 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6399,21 +6399,20 @@ static void addrconf_disable_change(struct net *net, __s32 newf) static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) { - struct net *net; + struct net *net = (struct net *)table->extra2; int old; + if (p == &net->ipv6.devconf_dflt->disable_ipv6) { + WRITE_ONCE(*p, newf); + return 0; + } + if (!rtnl_trylock()) return restart_syscall(); - net = (struct net *)table->extra2; old = *p; WRITE_ONCE(*p, newf); - if (p == &net->ipv6.devconf_dflt->disable_ipv6) { - rtnl_unlock(); - return 0; - } - if (p == &net->ipv6.devconf_all->disable_ipv6) { WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); -- cgit v1.2.3 From e7135f484994494a38071e3653a83d21d305f50c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:28 +0000 Subject: ipv6: annotate data-races around cnf.mtu6 idev->cnf.mtu6 might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 52a51c69aa9d..a30c6aa9e5cf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -332,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); out: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6975685f55ea..1b534edc7ca2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3671,7 +3671,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); break; } @@ -3763,7 +3763,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); } WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8523f0595b01..e96d79cd34d2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1578,8 +1578,8 @@ skip_routeinfo: if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); - } else if (in6_dev->cnf.mtu6 != mtu) { - in6_dev->cnf.mtu6 = mtu; + } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { + WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 707d65bc9c0e..66c685b0b619 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1596,7 +1596,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } @@ -3249,8 +3249,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, mtu = IPV6_MIN_MTU; idev = __in6_dev_get(dev); - if (idev && idev->cnf.mtu6 > mtu) - mtu = idev->cnf.mtu6; + if (idev) + mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6)); } mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); -- cgit v1.2.3 From 32f754176e889cdfe989ef08ece19859427755df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:30 +0000 Subject: ipv6: annotate data-races around cnf.forwarding idev->cnf.forwarding and net->ipv6.devconf_all->forwarding might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- include/net/ipv6.h | 8 +++++--- net/core/filter.c | 2 +- net/ipv6/addrconf.c | 10 ++++++---- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 11 ++++++----- net/ipv6/route.c | 4 ++-- 7 files changed, 22 insertions(+), 17 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index cd4083e0b3b9..e13e4920ee9b 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -339,7 +339,7 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) in6_dev = in6_dev_get(netdev); if (!in6_dev) goto out; - is_router = !!in6_dev->cnf.forwarding; + is_router = !!READ_ONCE(in6_dev->cnf.forwarding); in6_dev_put(in6_dev); /* ipv6_stub != NULL if in6_dev_get returned an inet6_dev */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cf25ea21d770..88a8e554f7a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) return 0; } -static inline bool ipv6_accept_ra(struct inet6_dev *idev) +static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { + s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); + /* If forwarding is enabled, RA are not accepted unless the special * hybrid mode (accept_ra=2) is enabled. */ - return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 : - idev->cnf.accept_ra; + return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 : + accept_ra; } #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ diff --git a/net/core/filter.c b/net/core/filter.c index 358870408a51..58e8e1a70aa7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5988,7 +5988,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return -ENODEV; idev = __in6_dev_get_safely(dev); - if (unlikely(!idev || !idev->cnf.forwarding)) + if (unlikely(!idev || !READ_ONCE(idev->cnf.forwarding))) return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b534edc7ca2..ec99e393f6a9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -551,7 +551,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto out; if ((all || type == NETCONFA_FORWARDING) && - nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + nla_put_s32(skb, NETCONFA_FORWARDING, + READ_ONCE(devconf->forwarding)) < 0) goto nla_put_failure; #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && @@ -869,7 +870,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); - idev->cnf.forwarding = newf; + + WRITE_ONCE(idev->cnf.forwarding, newf); if (changed) dev_forward_change(idev); } @@ -886,7 +888,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->forwarding) { if ((!newf) ^ (!old)) @@ -901,7 +903,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { int old_dflt = net->ipv6.devconf_dflt->forwarding; - net->ipv6.devconf_dflt->forwarding = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf); if ((!newf) ^ (!old_dflt)) inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0559bd000585..444be8c84cc5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -501,7 +501,7 @@ int ip6_forward(struct sk_buff *skb) u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) goto error; if (skb->pkt_type != PACKET_HOST) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9c9c31268432..1fb5e37bc78b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -903,7 +903,7 @@ have_ifp: } if (ipv6_chk_acast_addr(net, dev, &msg->target) || - (idev->cnf.forwarding && + (READ_ONCE(idev->cnf.forwarding) && (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && @@ -929,7 +929,7 @@ have_ifp: } if (is_router < 0) - is_router = idev->cnf.forwarding; + is_router = READ_ONCE(idev->cnf.forwarding); if (dad) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, @@ -1080,7 +1080,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * Note that we don't do a (daddr == all-routers-mcast) check. */ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; - if (!neigh && lladdr && idev && idev->cnf.forwarding) { + if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) { if (accept_untracked_na(dev, saddr)) { neigh = neigh_create(&nd_tbl, &msg->target, dev); new_state = NUD_STALE; @@ -1100,7 +1100,8 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && + READ_ONCE(net->ipv6.devconf_all->forwarding) && + net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; @@ -1148,7 +1149,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) } /* Don't accept RS if we're not in router mode */ - if (!idev->cnf.forwarding) + if (!READ_ONCE(idev->cnf.forwarding)) goto out; /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 66c685b0b619..6a2b53de4818 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2220,7 +2220,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, strict |= flags & RT6_LOOKUP_F_IFACE; strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) strict |= RT6_LOOKUP_F_REACHABLE; rcu_read_lock(); @@ -4149,7 +4149,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + if (READ_ONCE(in6_dev->cnf.forwarding) || !in6_dev->cnf.accept_redirects) return; /* RFC2461 8.1: -- cgit v1.2.3 From fca34cc075996767fbbdb6252be9ddd21c34c920 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:32 +0000 Subject: ipv6: annotate data-races around idev->cnf.ignore_routes_with_linkdown idev->cnf.ignore_routes_with_linkdown can be used without any locks, add appropriate annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- net/ipv6/addrconf.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 30d6f1e84e46..9d06eb945509 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -417,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) if (unlikely(!idev)) return true; - return !!idev->cnf.ignore_routes_with_linkdown; + return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ec99e393f6a9..82b8d90afb6a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -566,7 +566,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, - devconf->ignore_routes_with_linkdown) < 0) + READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0) goto nla_put_failure; out: @@ -935,7 +935,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) if (idev) { int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf); - idev->cnf.ignore_routes_with_linkdown = newf; + WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf); if (changed) inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, @@ -956,7 +956,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { if ((!newf) ^ (!old)) @@ -970,7 +970,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) { - net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf); addrconf_linkdown_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, -- cgit v1.2.3 From a8fbd4d90720b6c930661ed593d54aba77cec3c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:34 +0000 Subject: ipv6: annotate data-races around devconf->proxy_ndp devconf->proxy_ndp can be read and written locklessly, add appropriate annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 82b8d90afb6a..409aa5ddb856 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -561,7 +561,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && - nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, + READ_ONCE(devconf->proxy_ndp)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 444be8c84cc5..f08af3f4e54f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -552,7 +552,7 @@ int ip6_forward(struct sk_buff *skb) } /* XXX: idev->cnf.proxy_ndp? */ - if (net->ipv6.devconf_all->proxy_ndp && + if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f6430db24940..4114918f12c8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -904,7 +904,8 @@ have_ifp: if (ipv6_chk_acast_addr(net, dev, &msg->target) || (READ_ONCE(idev->cnf.forwarding) && - (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && + (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) || + READ_ONCE(idev->cnf.proxy_ndp)) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -1101,7 +1102,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && READ_ONCE(net->ipv6.devconf_all->forwarding) && - net->ipv6.devconf_all->proxy_ndp && + READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; -- cgit v1.2.3 From 624d5aec487cf8c2955d9c5880685714f7fe8e6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:35 +0000 Subject: ipv6: annotate data-races around devconf->disable_policy idev->cnf.disable_policy and net->ipv6.devconf_all->disable_policy can be read locklessly. Add appropriate annotations on reads and writes. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/route.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 409aa5ddb856..44340ff82da5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6699,7 +6699,7 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) if (!rtnl_trylock()) return restart_syscall(); - *valp = val; + WRITE_ONCE(*valp, val); net = (struct net *)ctl->extra2; if (valp == &net->ipv6.devconf_dflt->disable_policy) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f08af3f4e54f..b9dd3a66e423 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -513,8 +513,8 @@ int ip6_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!net->ipv6.devconf_all->disable_policy && - (!idev || !idev->cnf.disable_policy) && + if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && + (!idev || !READ_ONCE(idev->cnf.disable_policy)) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1b897c57c55f..a92fcac902ae 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4584,8 +4584,8 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, f6i->dst_nocount = true; if (!anycast && - (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy)) + (READ_ONCE(net->ipv6.devconf_all->disable_policy) || + READ_ONCE(idev->cnf.disable_policy))) f6i->dst_nopolicy = true; } -- cgit v1.2.3 From 45b90ec9a214ff7fbde8190126301a07387c46e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:36 +0000 Subject: ipv6: addrconf_disable_policy() optimization Writing over /proc/sys/net/ipv6/conf/default/disable_policy does not need to hold RTNL. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 44340ff82da5..0e7135a206b0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6693,20 +6693,19 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) static int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) { + struct net *net = (struct net *)ctl->extra2; struct inet6_dev *idev; - struct net *net; + + if (valp == &net->ipv6.devconf_dflt->disable_policy) { + WRITE_ONCE(*valp, val); + return 0; + } if (!rtnl_trylock()) return restart_syscall(); WRITE_ONCE(*valp, val); - net = (struct net *)ctl->extra2; - if (valp == &net->ipv6.devconf_dflt->disable_policy) { - rtnl_unlock(); - return 0; - } - if (valp == &net->ipv6.devconf_all->disable_policy) { struct net_device *dev; -- cgit v1.2.3 From 2aba913f99debce6cd1c7f606d69d094d1202559 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:37 +0000 Subject: ipv6/addrconf: annotate data-races around devconf fields (I) Annotate lockless reads and writes on following devconf fields: - regen_min_advance - regen_max_retry - dad_transmits - use_tempaddr - max_addresses - max_desync_factor - temp_valid_lft - rtr_solicits - rtr_solicit_max_interval - rtr_solicit_interval - rtr_solicit_delay - enhanced_dad - accept_redirects Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 67 +++++++++++++++++++++++++++++------------------------ net/ipv6/route.c | 3 ++- 2 files changed, 39 insertions(+), 31 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e7135a206b0..bcfe725f8fbd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1359,11 +1359,12 @@ out: in6_ifa_put(ifp); } -static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) +static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev) { - return idev->cnf.regen_min_advance + idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + return READ_ONCE(idev->cnf.regen_min_advance) + + READ_ONCE(idev->cnf.regen_max_retry) * + READ_ONCE(idev->cnf.dad_transmits) * + max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; } static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) @@ -1384,7 +1385,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) retry: in6_dev_hold(idev); - if (idev->cnf.use_tempaddr <= 0) { + if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) { write_unlock_bh(&idev->lock); pr_info("%s: use_tempaddr is disabled\n", __func__); in6_dev_put(idev); @@ -1392,8 +1393,8 @@ retry: goto out; } spin_lock_bh(&ifp->lock); - if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { - idev->cnf.use_tempaddr = -1; /*XXX*/ + if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) { + WRITE_ONCE(idev->cnf.use_tempaddr, -1); /*XXX*/ spin_unlock_bh(&ifp->lock); write_unlock_bh(&idev->lock); pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", @@ -1415,7 +1416,7 @@ retry: */ cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); max_desync_factor = min_t(long, - idev->cnf.max_desync_factor, + READ_ONCE(idev->cnf.max_desync_factor), cnf_temp_preferred_lft - regen_advance); if (unlikely(idev->desync_factor > max_desync_factor)) { @@ -1432,7 +1433,7 @@ retry: memset(&cfg, 0, sizeof(cfg)); cfg.valid_lft = min_t(__u32, ifp->valid_lft, - idev->cnf.temp_valid_lft + age); + READ_ONCE(idev->cnf.temp_valid_lft) + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft); cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); @@ -1685,7 +1686,7 @@ static int ipv6_get_saddr_eval(struct net *net, */ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? !!(dst->prefs & IPV6_PREFER_SRC_TMP) : - score->ifa->idev->cnf.use_tempaddr >= 2; + READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2; ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; } @@ -2168,6 +2169,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(idev->dev); + int max_addresses; if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -2205,9 +2207,9 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) spin_unlock_bh(&ifp->lock); - if (idev->cnf.max_addresses && - ipv6_count_addresses(idev) >= - idev->cnf.max_addresses) + max_addresses = READ_ONCE(idev->cnf.max_addresses); + if (max_addresses && + ipv6_count_addresses(idev) >= max_addresses) goto lock_errdad; net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n", @@ -2604,11 +2606,11 @@ static void manage_tempaddrs(struct inet6_dev *idev, * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively. */ age = (now - ift->cstamp) / HZ; - max_valid = idev->cnf.temp_valid_lft - age; + max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age; if (max_valid < 0) max_valid = 0; - max_prefered = idev->cnf.temp_prefered_lft - + max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) - idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; @@ -2641,7 +2643,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft)) create = true; - if (create && idev->cnf.use_tempaddr > 0) { + if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) { /* When a new public address is created as described * in [ADDRCONF], also create a new temporary address. */ @@ -2669,7 +2671,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, int create = 0, update_lft = 0; if (!ifp && valid_lft) { - int max_addresses = in6_dev->cnf.max_addresses; + int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses); struct ifa6_config cfg = { .pfx = addr, .plen = pinfo->prefix_len, @@ -4028,6 +4030,7 @@ static void addrconf_rs_timer(struct timer_list *t) struct inet6_dev *idev = from_timer(idev, t, rs_timer); struct net_device *dev = idev->dev; struct in6_addr lladdr; + int rtr_solicits; write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) @@ -4040,7 +4043,9 @@ static void addrconf_rs_timer(struct timer_list *t) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { + rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits); + + if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -4050,11 +4055,12 @@ static void addrconf_rs_timer(struct timer_list *t) write_lock(&idev->lock); idev->rs_interval = rfc3315_s14_backoff_update( - idev->rs_interval, idev->cnf.rtr_solicit_max_interval); + idev->rs_interval, + READ_ONCE(idev->cnf.rtr_solicit_max_interval)); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == - idev->cnf.rtr_solicits) ? - idev->cnf.rtr_solicit_delay : + READ_ONCE(idev->cnf.rtr_solicits)) ? + READ_ONCE(idev->cnf.rtr_solicit_delay) : idev->rs_interval); } else { /* @@ -4075,24 +4081,25 @@ put: */ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) { - unsigned long rand_num; struct inet6_dev *idev = ifp->idev; + unsigned long rand_num; u64 nonce; if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else - rand_num = get_random_u32_below(idev->cnf.rtr_solicit_delay ? : 1); + rand_num = get_random_u32_below( + READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1); nonce = 0; - if (idev->cnf.enhanced_dad || - dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) { + if (READ_ONCE(idev->cnf.enhanced_dad) || + READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) { do get_random_bytes(&nonce, 6); while (nonce == 0); } ifp->dad_nonce = nonce; - ifp->dad_probes = idev->cnf.dad_transmits; + ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits); addrconf_mod_dad_work(ifp, rand_num); } @@ -4331,7 +4338,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits != 0 && + READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 && (dev->flags & IFF_LOOPBACK) == 0 && (dev->type != ARPHRD_TUNNEL) && !netif_is_team_port(dev); @@ -4366,7 +4373,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); ifp->idev->rs_interval = rfc3315_s14_backoff_init( - ifp->idev->cnf.rtr_solicit_interval); + READ_ONCE(ifp->idev->cnf.rtr_solicit_interval)); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); @@ -5915,7 +5922,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, return -EINVAL; } - if (idev->cnf.rtr_solicits == 0) { + if (READ_ONCE(idev->cnf.rtr_solicits) == 0) { NL_SET_ERR_MSG(extack, "Router solicitation is disabled on device"); return -EINVAL; @@ -5948,7 +5955,7 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; idev->rs_interval = rfc3315_s14_backoff_init( - idev->cnf.rtr_solicit_interval); + READ_ONCE(idev->cnf.rtr_solicit_interval)); idev->rs_probes = 1; addrconf_mod_rs_timer(idev, idev->rs_interval); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a92fcac902ae..2cecb1c5a58f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4150,7 +4150,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (READ_ONCE(in6_dev->cnf.forwarding) || !in6_dev->cnf.accept_redirects) + if (READ_ONCE(in6_dev->cnf.forwarding) || + !READ_ONCE(in6_dev->cnf.accept_redirects)) return; /* RFC2461 8.1: -- cgit v1.2.3 From 2f0ff05a44302c91af54a5f9efe1b65b7681540e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:38 +0000 Subject: ipv6/addrconf: annotate data-races around devconf fields (II) Final (?) round of this series. Annotate lockless reads on following devconf fields, because they be changed concurrently from /proc/net/ipv6/conf. - accept_dad - optimistic_dad - use_optimistic - use_oif_addrs_only - ra_honor_pio_life - keep_addr_on_down - ndisc_notify - ndisc_evict_nocarrier - suppress_frag_ndisc - addr_gen_mode - seg6_enabled - ioam6_enabled - ioam6_id - ioam6_id_wide - drop_unicast_in_l2_multicast - mldv[12]_unsolicited_report_interval - force_mld_version - force_tllao - accept_untracked_na - drop_unsolicited_na - accept_source_route Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 49 ++++++++++++++++++++++++++----------------------- net/ipv6/exthdrs.c | 16 +++++++++------- net/ipv6/ioam6.c | 8 ++++---- net/ipv6/ip6_input.c | 2 +- net/ipv6/mcast.c | 14 +++++++------- net/ipv6/ndisc.c | 18 +++++++++--------- net/ipv6/seg6_hmac.c | 8 +++++--- 7 files changed, 61 insertions(+), 54 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bcfe725f8fbd..436d9a6e6a82 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1557,15 +1557,17 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static bool ipv6_use_optimistic_addr(struct net *net, - struct inet6_dev *idev) +static bool ipv6_use_optimistic_addr(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; - if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic) + if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) && + !READ_ONCE(idev->cnf.use_optimistic)) return false; return true; @@ -1574,13 +1576,14 @@ static bool ipv6_use_optimistic_addr(struct net *net, #endif } -static bool ipv6_allow_optimistic_dad(struct net *net, - struct inet6_dev *idev) +static bool ipv6_allow_optimistic_dad(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; return true; @@ -1862,7 +1865,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, idev = __in6_dev_get(dst_dev); if ((dst_type & IPV6_ADDR_MULTICAST) || dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || - (idev && idev->cnf.use_oif_addrs_only)) { + (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) { use_oif_addr = true; } } @@ -2683,8 +2686,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, }; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((net->ipv6.devconf_all->optimistic_dad || - in6_dev->cnf.optimistic_dad) && + if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(in6_dev->cnf.optimistic_dad)) && !net->ipv6.devconf_all->forwarding && sllao) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -2733,7 +2736,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, */ update_lft = !create && stored_lft; - if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { + if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); @@ -3317,8 +3320,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, struct inet6_ifaddr *ifp; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad || - idev->cnf.optimistic_dad) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(idev->cnf.optimistic_dad)) && !dev_net(idev->dev)->ipv6.devconf_all->forwarding) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -3890,10 +3893,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) */ if (!unregister && !idev->cnf.disable_ipv6) { /* aggregate the system setting and interface setting */ - int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down); if (!_keep_addr) - _keep_addr = idev->cnf.keep_addr_on_down; + _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down); keep_addr = (_keep_addr > 0); } @@ -4119,8 +4122,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) net = dev_net(dev); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - (net->ipv6.devconf_all->accept_dad < 1 && - idev->cnf.accept_dad < 1) || + (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 && + READ_ONCE(idev->cnf.accept_dad) < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bool send_na = false; @@ -4212,8 +4215,8 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; - if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 || - idev->cnf.accept_dad > 1) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 || + READ_ONCE(idev->cnf.accept_dad) > 1) && !idev->cnf.disable_ipv6 && !(ifp->flags & IFA_F_STABLE_PRIVACY)) { struct in6_addr addr; @@ -4352,8 +4355,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, /* send unsolicited NA if enabled */ if (send_na && - (ifp->idev->cnf.ndisc_notify || - dev_net(dev)->ipv6.devconf_all->ndisc_notify)) { + (READ_ONCE(ifp->idev->cnf.ndisc_notify) || + READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr, /*router=*/ !!ifp->idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, @@ -6541,7 +6544,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; - net->ipv6.devconf_dflt->addr_gen_mode = new_val; + WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val); for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev && @@ -6553,7 +6556,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } } - *((u32 *)ctl->data) = new_val; + WRITE_ONCE(*((u32 *)ctl->data), new_val); } out: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 727792907d6c..26f1e4a5ade0 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -379,9 +379,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); - accept_seg6 = net->ipv6.devconf_all->seg6_enabled; - if (accept_seg6 > idev->cnf.seg6_enabled) - accept_seg6 = idev->cnf.seg6_enabled; + accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), + READ_ONCE(idev->cnf.seg6_enabled)); if (!accept_seg6) { kfree_skb(skb); @@ -655,10 +654,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; struct net *net = dev_net(skb->dev); - int accept_source_route = net->ipv6.devconf_all->accept_source_route; + int accept_source_route; - if (idev && accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; + accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route); + + if (idev) + accept_source_route = min(accept_source_route, + READ_ONCE(idev->cnf.accept_source_route)); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -919,7 +921,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) goto drop; /* Ignore if IOAM is not enabled on ingress */ - if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 5fa923f06632..08c929513065 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -727,7 +727,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -735,7 +735,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -822,7 +822,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); @@ -830,7 +830,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 1ba97933c74f..133610a49da6 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -236,7 +236,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (!ipv6_addr_is_multicast(&hdr->daddr) && (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) && - idev->cnf.drop_unicast_in_l2_multicast) { + READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) { SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); goto err; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 76ee1615ff2a..7ba01d8cfbae 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -159,9 +159,9 @@ static int unsolicited_report_interval(struct inet6_dev *idev) int iv; if (mld_in_v1_mode(idev)) - iv = idev->cnf.mldv1_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval); else - iv = idev->cnf.mldv2_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval); return iv > 0 ? iv : 1; } @@ -1202,15 +1202,15 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, static int mld_force_mld_version(const struct inet6_dev *idev) { + const struct net *net = dev_net(idev->dev); + int all_force; + + all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version); /* Normally, both are 0 here. If enforcement to a particular is * being used, individual device enforcement will have a lower * precedence over 'all' device (.../conf/all/force_mld_version). */ - - if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0) - return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version; - else - return idev->cnf.force_mld_version; + return all_force ?: READ_ONCE(idev->cnf.force_mld_version); } static bool mld_in_v2_mode_only(const struct inet6_dev *idev) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4114918f12c8..ae134634c323 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -451,7 +451,7 @@ static void ip6_nd_hdr(struct sk_buff *skb, rcu_read_lock(); idev = __in6_dev_get(skb->dev); - tclass = idev ? idev->cnf.ndisc_tclass : 0; + tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); @@ -535,7 +535,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = false; - inc_opt |= ifp->idev->cnf.force_tllao; + inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, @@ -974,7 +974,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr) { struct inet6_dev *idev = __in6_dev_get(dev); - switch (idev->cnf.accept_untracked_na) { + switch (READ_ONCE(idev->cnf.accept_untracked_na)) { case 0: /* Don't accept untracked na (absent in neighbor cache) */ return 0; case 1: /* Create new entries from na if currently untracked */ @@ -1025,7 +1025,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && - idev->cnf.drop_unsolicited_na) + READ_ONCE(idev->cnf.drop_unsolicited_na)) return reason; if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) @@ -1818,7 +1818,7 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) if (!idev) return true; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && - idev->cnf.suppress_frag_ndisc) { + READ_ONCE(idev->cnf.suppress_frag_ndisc)) { net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); return true; } @@ -1895,8 +1895,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, idev = in6_dev_get(dev); if (!idev) break; - if (idev->cnf.ndisc_notify || - net->ipv6.devconf_all->ndisc_notify) + if (READ_ONCE(idev->cnf.ndisc_notify) || + READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; @@ -1905,8 +1905,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, if (!idev) evict_nocarrier = true; else { - evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && - net->ipv6.devconf_all->ndisc_evict_nocarrier; + evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && + READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); in6_dev_put(idev); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index d43c50a7310d..861e0366f549 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -241,6 +241,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) struct sr6_tlv_hmac *tlv; struct ipv6_sr_hdr *srh; struct inet6_dev *idev; + int require_hmac; idev = __in6_dev_get(skb->dev); @@ -248,16 +249,17 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) tlv = seg6_get_tlv_hmac(srh); + require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac); /* mandatory check but no tlv */ - if (idev->cnf.seg6_require_hmac > 0 && !tlv) + if (require_hmac > 0 && !tlv) return false; /* no check */ - if (idev->cnf.seg6_require_hmac < 0) + if (require_hmac < 0) return true; /* check only if present */ - if (idev->cnf.seg6_require_hmac == 0 && !tlv) + if (require_hmac == 0 && !tlv) return true; /* now, seg6_require_hmac >= 0 && tlv */ -- cgit v1.2.3 From 2a02f8379bde89472cddb49dc8e55a17e0b68eed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:39 +0000 Subject: ipv6: use xa_array iterator to implement inet6_netconf_dump_devconf() 1) inet6_netconf_dump_devconf() can run under RCU protection instead of RTNL. 2) properly return 0 at the end of a dump, avoiding an an extra recvmsg() system call. 3) Do not use inet6_base_seq() anymore, for_each_netdev_dump() has nice properties. Restarting a GETDEVCONF dump if a device has been added/removed or if net->ipv6.dev_addr_genid has changed is moot. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 102 +++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 58 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 436d9a6e6a82..2f84e6ecf19f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -727,17 +727,18 @@ static u32 inet6_base_seq(const struct net *net) return res; } - static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - int h, s_h; - int idx, s_idx; + struct { + unsigned long ifindex; + unsigned int all_default; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; + int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; @@ -754,64 +755,48 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, } } - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet6_base_seq(net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - - if (inet6_netconf_fill_devconf(skb, dev->ifindex, - &idev->cnf, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, - NLM_F_MULTI, - NETCONFA_ALL) < 0) { - rcu_read_unlock(); - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - rcu_read_unlock(); + rcu_read_lock(); + for_each_netdev_dump(net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = inet6_netconf_fill_devconf(skb, dev->ifindex, + &idev->cnf, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) + goto done; } - if (h == NETDEV_HASHENTRIES) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, - net->ipv6.devconf_all, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + if (ctx->all_default == 0) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; - } - if (h == NETDEV_HASHENTRIES + 1) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, - net->ipv6.devconf_dflt, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + ctx->all_default++; + } + if (ctx->all_default == 1) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; + ctx->all_default++; } done: - cb->args[0] = h; - cb->args[1] = idx; - - return skb->len; + if (err < 0 && likely(skb->len)) + err = skb->len; + rcu_read_unlock(); + return err; } #ifdef CONFIG_SYSCTL @@ -7503,7 +7488,8 @@ int __init addrconf_init(void) err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, inet6_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = ipv6_addr_label_rtnl_register(); -- cgit v1.2.3 From 02e24903e5a46b7a7fca44bcfe0cd6fa5b240c34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 10:24:26 +0000 Subject: netlink: let core handle error cases in dump operations After commit b5a899154aa9 ("netlink: handle EMSGSIZE errors in the core"), we can remove some code that was not 100 % correct anyway. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306102426.245689-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 5 +---- net/ipv4/devinet.c | 4 ---- net/ipv4/fib_frontend.c | 7 +------ net/ipv6/addrconf.c | 7 +------ 4 files changed, 3 insertions(+), 20 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e0353487c57e..a3d7847ce69d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2267,11 +2267,8 @@ walk_entries: nlh->nlmsg_seq, 0, flags, ext_filter_mask, 0, NULL, 0, netnsid, GFP_KERNEL); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) break; - } } cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 4daa8124f247..7a437f0d4190 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1900,8 +1900,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) goto done; } done: - if (err < 0 && likely(skb->len)) - err = skb->len; if (fillargs.netnsid >= 0) put_net(tgt_net); rcu_read_unlock(); @@ -2312,8 +2310,6 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, ctx->all_default++; } done: - if (err < 0 && likely(skb->len)) - err = skb->len; rcu_read_unlock(); return err; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index bf3a2214fe29..48741352a88a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1026,8 +1026,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) goto unlock; } err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0 && skb->len) - err = skb->len; goto unlock; } @@ -1045,11 +1043,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) goto out; - } dumped = 1; next: e++; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f84e6ecf19f..f786b65d12e4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -793,8 +793,6 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, ctx->all_default++; } done: - if (err < 0 && likely(skb->len)) - err = skb->len; rcu_read_unlock(); return err; } @@ -6158,11 +6156,8 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) break; - } } rcu_read_unlock(); -- cgit v1.2.3 From f0a7da702093d5ff2c54ea18d6b47658be1b4522 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:41 +0000 Subject: ipv6: make inet6_fill_ifaddr() lockless Make inet6_fill_ifaddr() lockless, and add approriate annotations on ifa->tstamp, ifa->valid_lft, ifa->preferred_lft, ifa->ifa_proto and ifa->rt_priority. Also constify 2nd argument of inet6_fill_ifaddr(), inet6_fill_ifmcaddr() and inet6_fill_ifacaddr(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 66 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 29 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f786b65d12e4..5864a1fc6850 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2728,7 +2728,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, if (update_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; - ifp->tstamp = now; + WRITE_ONCE(ifp->tstamp, now); flags = ifp->flags; ifp->flags &= ~IFA_F_DEPRECATED; spin_unlock_bh(&ifp->lock); @@ -4896,13 +4896,13 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE); ifp->flags |= cfg->ifa_flags; - ifp->tstamp = jiffies; - ifp->valid_lft = cfg->valid_lft; - ifp->prefered_lft = cfg->preferred_lft; - ifp->ifa_proto = cfg->ifa_proto; + WRITE_ONCE(ifp->tstamp, jiffies); + WRITE_ONCE(ifp->valid_lft, cfg->valid_lft); + WRITE_ONCE(ifp->prefered_lft, cfg->preferred_lft); + WRITE_ONCE(ifp->ifa_proto, cfg->ifa_proto); if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority) - ifp->rt_priority = cfg->rt_priority; + WRITE_ONCE(ifp->rt_priority, cfg->rt_priority); if (new_peer) ifp->peer_addr = *cfg->peer_pfx; @@ -5123,17 +5123,21 @@ struct inet6_fill_args { enum addr_type_t type; }; -static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, +static int inet6_fill_ifaddr(struct sk_buff *skb, + const struct inet6_ifaddr *ifa, struct inet6_fill_args *args) { - struct nlmsghdr *nlh; + struct nlmsghdr *nlh; u32 preferred, valid; + u32 flags, priority; + u8 proto; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; + flags = READ_ONCE(ifa->flags); put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); @@ -5141,13 +5145,14 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; - spin_lock_bh(&ifa->lock); - if (!((ifa->flags&IFA_F_PERMANENT) && - (ifa->prefered_lft == INFINITY_LIFE_TIME))) { - preferred = ifa->prefered_lft; - valid = ifa->valid_lft; + preferred = READ_ONCE(ifa->prefered_lft); + valid = READ_ONCE(ifa->valid_lft); + + if (!((flags & IFA_F_PERMANENT) && + (preferred == INFINITY_LIFE_TIME))) { if (preferred != INFINITY_LIFE_TIME) { - long tval = (jiffies - ifa->tstamp)/HZ; + long tval = (jiffies - READ_ONCE(ifa->tstamp)) / HZ; + if (preferred > tval) preferred -= tval; else @@ -5163,28 +5168,29 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } - spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0) goto error; - } else + } else { if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0) goto error; + } - if (ifa->rt_priority && - nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority)) + priority = READ_ONCE(ifa->rt_priority); + if (priority && nla_put_u32(skb, IFA_RT_PRIORITY, priority)) goto error; - if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + if (put_cacheinfo(skb, ifa->cstamp, READ_ONCE(ifa->tstamp), + preferred, valid) < 0) goto error; - if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) + if (nla_put_u32(skb, IFA_FLAGS, flags) < 0) goto error; - if (ifa->ifa_proto && - nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) + proto = READ_ONCE(ifa->ifa_proto); + if (proto && nla_put_u8(skb, IFA_PROTO, proto)) goto error; nlmsg_end(skb, nlh); @@ -5195,12 +5201,13 @@ error: return -EMSGSIZE; } -static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, +static int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, struct inet6_fill_args *args) { - struct nlmsghdr *nlh; - u8 scope = RT_SCOPE_UNIVERSE; int ifindex = ifmca->idev->dev->ifindex; + u8 scope = RT_SCOPE_UNIVERSE; + struct nlmsghdr *nlh; if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; @@ -5218,7 +5225,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || - put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, + put_cacheinfo(skb, ifmca->mca_cstamp, READ_ONCE(ifmca->mca_tstamp), INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -5228,13 +5235,14 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return 0; } -static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, +static int inet6_fill_ifacaddr(struct sk_buff *skb, + const struct ifacaddr6 *ifaca, struct inet6_fill_args *args) { struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt); int ifindex = dev ? dev->ifindex : 1; - struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; + struct nlmsghdr *nlh; if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; @@ -5252,7 +5260,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || - put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, + put_cacheinfo(skb, ifaca->aca_cstamp, READ_ONCE(ifaca->aca_tstamp), INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; -- cgit v1.2.3 From 46f5182dd792c55940ca520576d1544744732b81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:42 +0000 Subject: ipv6: make in6_dump_addrs() lockless in6_dump_addrs() is called with RCU protection. There is no need holding idev->lock to iterate through unicast addresses. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5864a1fc6850..c662972d9206 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5271,23 +5271,22 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, } /* called with rcu_read_lock() */ -static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, +static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb, struct netlink_callback *cb, int s_ip_idx, struct inet6_fill_args *fillargs) { - struct ifmcaddr6 *ifmca; - struct ifacaddr6 *ifaca; + const struct ifmcaddr6 *ifmca; + const struct ifacaddr6 *ifaca; int ip_idx = 0; int err = 1; - read_lock_bh(&idev->lock); switch (fillargs->type) { case UNICAST_ADDR: { - struct inet6_ifaddr *ifa; + const struct inet6_ifaddr *ifa; fillargs->event = RTM_NEWADDR; /* unicast address incl. temp addr */ - list_for_each_entry(ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { if (ip_idx < s_ip_idx) goto next; err = inet6_fill_ifaddr(skb, ifa, fillargs); @@ -5300,7 +5299,6 @@ next: break; } case MULTICAST_ADDR: - read_unlock_bh(&idev->lock); fillargs->event = RTM_GETMULTICAST; /* multicast address */ @@ -5313,7 +5311,6 @@ next: if (err < 0) break; } - read_lock_bh(&idev->lock); break; case ANYCAST_ADDR: fillargs->event = RTM_GETANYCAST; @@ -5330,7 +5327,6 @@ next: default: break; } - read_unlock_bh(&idev->lock); cb->args[2] = ip_idx; return err; } -- cgit v1.2.3 From 9cc4cc329d30dbb353fbb23ef8cd633e8f5b8ccf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:43 +0000 Subject: ipv6: use xa_array iterator to implement inet6_dump_addr() inet6_dump_addr() can use the new xa_array iterator for better scalability. Make it ready for RCU-only protection. RTNL use is removed in the following patch. Also properly return 0 at the end of a dump to avoid and extra recvmsg() to get NLMSG_DONE. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 79 ++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 49 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c662972d9206..4359e4910249 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -717,7 +717,7 @@ errout: static u32 inet6_base_seq(const struct net *net) { u32 res = atomic_read(&net->ipv6.dev_addr_genid) + - net->dev_base_seq; + READ_ONCE(net->dev_base_seq); /* Must not return 0 (see nl_dump_check_consistent()). * Chose a value far away from 0. @@ -5272,13 +5272,13 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, /* called with rcu_read_lock() */ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb, - struct netlink_callback *cb, int s_ip_idx, + struct netlink_callback *cb, int *s_ip_idx, struct inet6_fill_args *fillargs) { const struct ifmcaddr6 *ifmca; const struct ifacaddr6 *ifaca; int ip_idx = 0; - int err = 1; + int err = 0; switch (fillargs->type) { case UNICAST_ADDR: { @@ -5287,7 +5287,7 @@ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { - if (ip_idx < s_ip_idx) + if (ip_idx < *s_ip_idx) goto next; err = inet6_fill_ifaddr(skb, ifa, fillargs); if (err < 0) @@ -5305,7 +5305,7 @@ next: for (ifmca = rcu_dereference(idev->mc_list); ifmca; ifmca = rcu_dereference(ifmca->next), ip_idx++) { - if (ip_idx < s_ip_idx) + if (ip_idx < *s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); if (err < 0) @@ -5317,7 +5317,7 @@ next: /* anycast address */ for (ifaca = rcu_dereference(idev->ac_list); ifaca; ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) { - if (ip_idx < s_ip_idx) + if (ip_idx < *s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, fillargs); if (err < 0) @@ -5327,7 +5327,7 @@ next: default: break; } - cb->args[2] = ip_idx; + *s_ip_idx = err ? ip_idx : 0; return err; } @@ -5390,6 +5390,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type) { + struct net *tgt_net = sock_net(skb->sk); const struct nlmsghdr *nlh = cb->nlh; struct inet6_fill_args fillargs = { .portid = NETLINK_CB(cb->skb).portid, @@ -5398,72 +5399,52 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .netnsid = -1, .type = type, }; - struct net *tgt_net = sock_net(skb->sk); - int idx, s_idx, s_ip_idx; - int h, s_h; + struct { + unsigned long ifindex; + int ip_idx; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; int err = 0; - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - s_ip_idx = cb->args[2]; - rcu_read_lock(); if (cb->strict_check) { err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - goto put_tgt_net; + goto done; err = 0; if (fillargs.ifindex) { - dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) { - err = -ENODEV; - goto put_tgt_net; - } + err = -ENODEV; + dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); + if (!dev) + goto done; idev = __in6_dev_get(dev); - if (idev) { - err = in6_dump_addrs(idev, skb, cb, s_ip_idx, + if (idev) + err = in6_dump_addrs(idev, skb, cb, + &ctx->ip_idx, &fillargs); - if (err > 0) - err = 0; - } - goto put_tgt_net; + goto done; } } cb->seq = inet6_base_seq(tgt_net); - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &tgt_net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - if (h > s_h || idx > s_idx) - s_ip_idx = 0; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - - if (in6_dump_addrs(idev, skb, cb, s_ip_idx, - &fillargs) < 0) - goto done; -cont: - idx++; - } + for_each_netdev_dump(tgt_net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = in6_dump_addrs(idev, skb, cb, &ctx->ip_idx, + &fillargs); + if (err < 0) + goto done; } done: - cb->args[0] = h; - cb->args[1] = idx; -put_tgt_net: rcu_read_unlock(); if (fillargs.netnsid >= 0) put_net(tgt_net); - return skb->len ? : err; + return err; } static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) -- cgit v1.2.3 From 155549a6683b1ee37cecec1bd4a439083b706656 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:44 +0000 Subject: ipv6: remove RTNL protection from inet6_dump_addr() We can now remove RTNL acquisition while running inet6_dump_addr(), inet6_dump_ifmcaddr() and inet6_dump_ifacaddr(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv6/addrconf.c') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4359e4910249..247bd4d8ee45 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7454,15 +7454,18 @@ int __init addrconf_init(void) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST, - NULL, inet6_dump_ifmcaddr, 0); + NULL, inet6_dump_ifmcaddr, + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST, - NULL, inet6_dump_ifacaddr, 0); + NULL, inet6_dump_ifacaddr, + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, -- cgit v1.2.3