From 02c81ab95d8718d75886d16227a10cc7774493ea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 22 Dec 2014 18:56:35 +0100 Subject: netlink: rename netlink_unbind() to netlink_undo_bind() The new name is more expressive - this isn't a generic unbind function but rather only a little undo helper for use only in netlink_bind(). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/netlink/af_netlink.c') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 074cf3e91c6f..b4cf8ee0e1b8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1410,8 +1410,8 @@ static int netlink_realloc_groups(struct sock *sk) return err; } -static void netlink_unbind(int group, long unsigned int groups, - struct netlink_sock *nlk) +static void netlink_undo_bind(int group, long unsigned int groups, + struct netlink_sock *nlk) { int undo; @@ -1461,7 +1461,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, err = nlk->netlink_bind(group); if (!err) continue; - netlink_unbind(group, groups, nlk); + netlink_undo_bind(group, groups, nlk); return err; } } @@ -1471,7 +1471,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups, groups, nlk); + netlink_undo_bind(nlk->ngroups, groups, nlk); return err; } } -- cgit v1.2.3 From b10dcb3b94010e3ac3951f68789400b1665effb1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 22 Dec 2014 18:56:37 +0100 Subject: netlink: update listeners directly when removing socket The code is now confusing to read - first in one function down (netlink_remove) any group subscriptions are implicitly removed by calling __sk_del_bind_node(), but the subscriber database is only updated far later by calling netlink_update_listeners(). Move the latter call to just after removal from the list so it is easier to follow the code. This also enables moving the locking inside the kernel-socket conditional, which improves the normal socket destruction path. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/netlink/af_netlink.c') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b4cf8ee0e1b8..6a9fb7c489a8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1091,8 +1091,10 @@ static void netlink_remove(struct sock *sk) mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); - if (nlk_sk(sk)->subscriptions) + if (nlk_sk(sk)->subscriptions) { __sk_del_bind_node(sk); + netlink_update_listeners(sk); + } netlink_table_ungrab(); } @@ -1226,8 +1228,8 @@ static int netlink_release(struct socket *sock) module_put(nlk->module); - netlink_table_grab(); if (netlink_is_kernel(sk)) { + netlink_table_grab(); BUG_ON(nl_table[sk->sk_protocol].registered == 0); if (--nl_table[sk->sk_protocol].registered == 0) { struct listeners *old; @@ -1241,10 +1243,8 @@ static int netlink_release(struct socket *sock) nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } - } else if (nlk->subscriptions) { - netlink_update_listeners(sk); + netlink_table_ungrab(); } - netlink_table_ungrab(); kfree(nlk->groups); nlk->groups = NULL; -- cgit v1.2.3 From 7d68536bed72b09de03b07479dd707c5831b3b94 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 22 Dec 2014 18:56:38 +0100 Subject: netlink: call unbind when releasing socket Currently, netlink_unbind() is only called when the socket explicitly unbinds, which limits its usefulness (luckily there are no users of it yet anyway.) Call netlink_unbind() also when a socket is released, so it becomes possible to track listeners with this callback and without also implementing a netlink notifier (and checking netlink_has_listeners() in there.) Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/netlink/af_netlink.c') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6a9fb7c489a8..f29b63fad932 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1246,6 +1246,13 @@ static int netlink_release(struct socket *sock) netlink_table_ungrab(); } + if (nlk->netlink_unbind) { + int i; + + for (i = 0; i < nlk->ngroups; i++) + if (test_bit(i, nlk->groups)) + nlk->netlink_unbind(i + 1); + } kfree(nlk->groups); nlk->groups = NULL; -- cgit v1.2.3 From 023e2cfa36c31b0ad28c159a1bb0d61ff57334c8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 23 Dec 2014 21:00:06 +0100 Subject: netlink/genetlink: pass network namespace to bind/unbind Netlink families can exist in multiple namespaces, and for the most part multicast subscriptions are per network namespace. Thus it only makes sense to have bind/unbind notifications per network namespace. To achieve this, pass the network namespace of a given client socket to the bind/unbind functions. Also do this in generic netlink, and there also make sure that any bind for multicast groups that only exist in init_net is rejected. This isn't really a problem if it is accepted since a client in a different namespace will never receive any notifications from such a group, but it can confuse the family if not rejected (it's also possible to silently (without telling the family) accept it, but it would also have to be ignored on unbind so families that take any kind of action on bind/unbind won't do unnecessary work for invalid clients like that. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 ++-- include/net/genetlink.h | 4 ++-- kernel/audit.c | 2 +- net/netfilter/nfnetlink.c | 2 +- net/netlink/af_netlink.c | 21 +++++++++++---------- net/netlink/af_netlink.h | 8 ++++---- net/netlink/genetlink.c | 12 +++++++----- 7 files changed, 28 insertions(+), 25 deletions(-) (limited to 'net/netlink/af_netlink.c') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9e572daa15d5..02fc86d2348e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -46,8 +46,8 @@ struct netlink_kernel_cfg { unsigned int flags; void (*input)(struct sk_buff *skb); struct mutex *cb_mutex; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); bool (*compare)(struct net *net, struct sock *sk); }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 3ed31e5a445b..84125088c309 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -56,8 +56,8 @@ struct genl_family { void (*post_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); - int (*mcast_bind)(int group); - void (*mcast_unbind)(int group); + int (*mcast_bind)(struct net *net, int group); + void (*mcast_unbind)(struct net *net, int group); struct nlattr ** attrbuf; /* private */ const struct genl_ops * ops; /* private */ const struct genl_multicast_group *mcgrps; /* private */ diff --git a/kernel/audit.c b/kernel/audit.c index f8f203e8018c..aba9d9fadf0c 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1100,7 +1100,7 @@ static void audit_receive(struct sk_buff *skb) } /* Run custom bind function on netlink socket group connect or bind requests. */ -static int audit_bind(int group) +static int audit_bind(struct net *net, int group) { if (!capable(CAP_AUDIT_READ)) return -EPERM; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 13c2e17bbe27..cde4a6702fa3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -463,7 +463,7 @@ static void nfnetlink_rcv(struct sk_buff *skb) } #ifdef CONFIG_MODULES -static int nfnetlink_bind(int group) +static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; int type; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f29b63fad932..84ea76ca3f1f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1141,8 +1141,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1251,7 +1251,7 @@ static int netlink_release(struct socket *sock) for (i = 0; i < nlk->ngroups; i++) if (test_bit(i, nlk->groups)) - nlk->netlink_unbind(i + 1); + nlk->netlink_unbind(sock_net(sk), i + 1); } kfree(nlk->groups); nlk->groups = NULL; @@ -1418,8 +1418,9 @@ static int netlink_realloc_groups(struct sock *sk) } static void netlink_undo_bind(int group, long unsigned int groups, - struct netlink_sock *nlk) + struct sock *sk) { + struct netlink_sock *nlk = nlk_sk(sk); int undo; if (!nlk->netlink_unbind) @@ -1427,7 +1428,7 @@ static void netlink_undo_bind(int group, long unsigned int groups, for (undo = 0; undo < group; undo++) if (test_bit(undo, &groups)) - nlk->netlink_unbind(undo); + nlk->netlink_unbind(sock_net(sk), undo); } static int netlink_bind(struct socket *sock, struct sockaddr *addr, @@ -1465,10 +1466,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, for (group = 0; group < nlk->ngroups; group++) { if (!test_bit(group, &groups)) continue; - err = nlk->netlink_bind(group); + err = nlk->netlink_bind(net, group); if (!err) continue; - netlink_undo_bind(group, groups, nlk); + netlink_undo_bind(group, groups, sk); return err; } } @@ -1478,7 +1479,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_undo_bind(nlk->ngroups, groups, nlk); + netlink_undo_bind(nlk->ngroups, groups, sk); return err; } } @@ -2129,7 +2130,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (!val || val - 1 >= nlk->ngroups) return -EINVAL; if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { - err = nlk->netlink_bind(val); + err = nlk->netlink_bind(sock_net(sk), val); if (err) return err; } @@ -2138,7 +2139,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) - nlk->netlink_unbind(val); + nlk->netlink_unbind(sock_net(sk), val); err = 0; break; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index b20a1731759b..f123a88496f8 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -39,8 +39,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - int (*netlink_bind)(int group); - void (*netlink_unbind)(int group); + int (*netlink_bind)(struct net *net, int group); + void (*netlink_unbind)(struct net *net, int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -65,8 +65,8 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 05bf40bbd189..91566ed36c43 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -983,7 +983,7 @@ static struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; -static int genl_bind(int group) +static int genl_bind(struct net *net, int group) { int i, err; bool found = false; @@ -997,8 +997,10 @@ static int genl_bind(int group) group < f->mcgrp_offset + f->n_mcgrps) { int fam_grp = group - f->mcgrp_offset; - if (f->mcast_bind) - err = f->mcast_bind(fam_grp); + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); else err = 0; found = true; @@ -1014,7 +1016,7 @@ static int genl_bind(int group) return err; } -static void genl_unbind(int group) +static void genl_unbind(struct net *net, int group) { int i; bool found = false; @@ -1029,7 +1031,7 @@ static void genl_unbind(int group) int fam_grp = group - f->mcgrp_offset; if (f->mcast_unbind) - f->mcast_unbind(fam_grp); + f->mcast_unbind(net, fam_grp); found = true; break; } -- cgit v1.2.3 From ee1c244219fd652964710a6cc3e4f922e86aa492 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jan 2015 11:37:14 +0100 Subject: genetlink: synchronize socket closing and family removal In addition to the problem Jeff Layton reported, I looked at the code and reproduced the same warning by subscribing and removing the genl family with a socket still open. This is a fairly tricky race which originates in the fact that generic netlink allows the family to go away while sockets are still open - unlike regular netlink which has a module refcount for every open socket so in general this cannot be triggered. Trying to resolve this issue by the obvious locking isn't possible as it will result in deadlocks between unregistration and group unbind notification (which incidentally lockdep doesn't find due to the home grown locking in the netlink table.) To really resolve this, introduce a "closing socket" reference counter (for generic netlink only, as it's the only affected family) in the core netlink code and use that in generic netlink to wait for all the sockets that are being closed at the same time as a generic netlink family is removed. This fixes the race that when a socket is closed, it will should call the unbind, but if the family is removed at the same time the unbind will not find it, leading to the warning. The real problem though is that in this case the unbind could actually find a new family that is registered to have a multicast group with the same ID, and call its mcast_unbind() leading to confusing. Also remove the warning since it would still trigger, but is now no longer a problem. This also moves the code in af_netlink.c to before unreferencing the module to avoid having the same problem in the normal non-genl case. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/genetlink.h | 4 ++++ include/net/genetlink.h | 5 ++++- net/netlink/af_netlink.c | 24 +++++++++++++++++------- net/netlink/af_netlink.h | 1 + net/netlink/genetlink.c | 16 +++++++++------- 5 files changed, 35 insertions(+), 15 deletions(-) (limited to 'net/netlink/af_netlink.c') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 55b685719d52..09460d6d6682 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -11,6 +11,10 @@ extern void genl_unlock(void); extern int lockdep_genl_is_held(void); #endif +/* for synchronisation between af_netlink and genetlink */ +extern atomic_t genl_sk_destructing_cnt; +extern wait_queue_head_t genl_sk_destructing_waitq; + /** * rcu_dereference_genl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 2ea2c55bdc87..6c92415311ca 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -35,7 +35,10 @@ struct genl_info; * undo operations done by pre_doit, for example release locks * @mcast_bind: a socket bound to the given multicast group (which * is given as the offset into the groups array) - * @mcast_unbind: a socket was unbound from the given multicast group + * @mcast_unbind: a socket was unbound from the given multicast group. + * Note that unbind() will not be called symmetrically if the + * generic netlink family is removed while there are still open + * sockets. * @attrbuf: buffer to store parsed attributes * @family_list: family list * @mcgrps: multicast groups used by this family (private) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 84ea76ca3f1f..02fdde28dada 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -1095,6 +1096,8 @@ static void netlink_remove(struct sock *sk) __sk_del_bind_node(sk); netlink_update_listeners(sk); } + if (sk->sk_protocol == NETLINK_GENERIC) + atomic_inc(&genl_sk_destructing_cnt); netlink_table_ungrab(); } @@ -1211,6 +1214,20 @@ static int netlink_release(struct socket *sock) * will be purged. */ + /* must not acquire netlink_table_lock in any way again before unbind + * and notifying genetlink is done as otherwise it might deadlock + */ + if (nlk->netlink_unbind) { + int i; + + for (i = 0; i < nlk->ngroups; i++) + if (test_bit(i, nlk->groups)) + nlk->netlink_unbind(sock_net(sk), i + 1); + } + if (sk->sk_protocol == NETLINK_GENERIC && + atomic_dec_return(&genl_sk_destructing_cnt) == 0) + wake_up(&genl_sk_destructing_waitq); + sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); @@ -1246,13 +1263,6 @@ static int netlink_release(struct socket *sock) netlink_table_ungrab(); } - if (nlk->netlink_unbind) { - int i; - - for (i = 0; i < nlk->ngroups; i++) - if (test_bit(i, nlk->groups)) - nlk->netlink_unbind(sock_net(sk), i + 1); - } kfree(nlk->groups); nlk->groups = NULL; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index f123a88496f8..f1c31b39aa3e 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -2,6 +2,7 @@ #define _AF_NETLINK_H #include +#include #include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index c18d3f5624b2..ee57459fc258 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -23,6 +23,9 @@ static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ static DECLARE_RWSEM(cb_lock); +atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq); + void genl_lock(void) { mutex_lock(&genl_mutex); @@ -435,15 +438,18 @@ int genl_unregister_family(struct genl_family *family) genl_lock_all(); - genl_unregister_mc_groups(family); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { if (family->id != rc->id || strcmp(rc->name, family->name)) continue; + genl_unregister_mc_groups(family); + list_del(&rc->family_list); family->n_ops = 0; - genl_unlock_all(); + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -1014,7 +1020,6 @@ static int genl_bind(struct net *net, int group) static void genl_unbind(struct net *net, int group) { int i; - bool found = false; down_read(&cb_lock); for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { @@ -1027,14 +1032,11 @@ static void genl_unbind(struct net *net, int group) if (f->mcast_unbind) f->mcast_unbind(net, fam_grp); - found = true; break; } } } up_read(&cb_lock); - - WARN_ON(!found); } static int __net_init genl_pernet_init(struct net *net) -- cgit v1.2.3