summaryrefslogtreecommitdiffstats
path: root/net/netlink
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink')
-rw-r--r--net/netlink/af_netlink.c352
-rw-r--r--net/netlink/af_netlink.h11
-rw-r--r--net/netlink/diag.c15
-rw-r--r--net/netlink/genetlink.c70
4 files changed, 252 insertions, 196 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ef5f77b44ec7..2702673f0f23 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -61,6 +61,7 @@
#include <linux/rhashtable.h>
#include <asm/cacheflush.h>
#include <linux/hash.h>
+#include <linux/genetlink.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -97,12 +98,12 @@ static int netlink_dump(struct sock *sk);
static void netlink_skb_destructor(struct sk_buff *skb);
/* nl_table locking explained:
- * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock
- * combined with an RCU read-side lock. Insertion and removal are protected
- * with nl_sk_hash_lock while using RCU list modification primitives and may
- * run in parallel to nl_table_lock protected lookups. Destruction of the
- * Netlink socket may only occur *after* nl_table_lock has been acquired
- * either during or after the socket has been removed from the list.
+ * Lookup and traversal are protected with an RCU read-side lock. Insertion
+ * and removal are protected with per bucket lock while using RCU list
+ * modification primitives and may run in parallel to RCU protected lookups.
+ * Destruction of the Netlink socket may only occur *after* nl_table_lock has
+ * been acquired * either during or after the socket has been removed from
+ * the list and after an RCU grace period.
*/
DEFINE_RWLOCK(nl_table_lock);
EXPORT_SYMBOL_GPL(nl_table_lock);
@@ -110,19 +111,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
-/* Protects netlink socket hash table mutations */
-DEFINE_MUTEX(nl_sk_hash_lock);
-EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
-
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nl_sk_hash_is_held(void *parent)
-{
- if (debug_locks)
- return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
- return 1;
-}
-#endif
-
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -525,14 +513,14 @@ out:
return err;
}
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
{
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
struct page *p_start, *p_end;
/* First page is flushed through netlink_{get,set}_status */
p_start = pgvec_to_page(hdr + PAGE_SIZE);
- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+ p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
while (p_start <= p_end) {
flush_dcache_page(p_start);
p_start++;
@@ -550,9 +538,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
static void netlink_set_status(struct nl_mmap_hdr *hdr,
enum nl_mmap_status status)
{
+ smp_mb();
hdr->nm_status = status;
flush_dcache_page(pgvec_to_page(hdr));
- smp_wmb();
}
static struct nl_mmap_hdr *
@@ -707,31 +695,23 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
u32 dst_portid, u32 dst_group,
- struct sock_iocb *siocb)
+ struct scm_cookie *scm)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ring *ring;
struct nl_mmap_hdr *hdr;
struct sk_buff *skb;
unsigned int maxlen;
- bool excl = true;
int err = 0, len = 0;
- /* Netlink messages are validated by the receiver before processing.
- * In order to avoid userspace changing the contents of the message
- * after validation, the socket and the ring may only be used by a
- * single process, otherwise we fall back to copying.
- */
- if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
- atomic_read(&nlk->mapped) > 1)
- excl = false;
-
mutex_lock(&nlk->pg_vec_lock);
ring = &nlk->tx_ring;
maxlen = ring->frame_size - NL_MMAP_HDRLEN;
do {
+ unsigned int nm_len;
+
hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
if (hdr == NULL) {
if (!(msg->msg_flags & MSG_DONTWAIT) &&
@@ -739,41 +719,29 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
schedule();
continue;
}
- if (hdr->nm_len > maxlen) {
+
+ nm_len = ACCESS_ONCE(hdr->nm_len);
+ if (nm_len > maxlen) {
err = -EINVAL;
goto out;
}
- netlink_frame_flush_dcache(hdr);
+ netlink_frame_flush_dcache(hdr, nm_len);
- if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
- skb = alloc_skb_head(GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- sock_hold(sk);
- netlink_ring_setup_skb(skb, sk, ring, hdr);
- NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
- __skb_put(skb, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
- atomic_inc(&ring->pending);
- } else {
- skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- __skb_put(skb, hdr->nm_len);
- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+ skb = alloc_skb(nm_len, GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto out;
}
+ __skb_put(skb, nm_len);
+ memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
netlink_increment_head(ring);
NETLINK_CB(skb).portid = nlk->portid;
NETLINK_CB(skb).dst_group = dst_group;
- NETLINK_CB(skb).creds = siocb->scm->creds;
+ NETLINK_CB(skb).creds = scm->creds;
err = security_netlink_send(sk, skb);
if (err) {
@@ -813,7 +781,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
hdr->nm_pid = NETLINK_CB(skb).creds.pid;
hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
- netlink_frame_flush_dcache(hdr);
+ netlink_frame_flush_dcache(hdr, hdr->nm_len);
netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
@@ -852,7 +820,7 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
#define netlink_tx_is_mmaped(sk) false
#define netlink_mmap sock_no_mmap
#define netlink_poll datagram_poll
-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
+#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0
#endif /* CONFIG_NETLINK_MMAP */
static void netlink_skb_destructor(struct sk_buff *skb)
@@ -1022,26 +990,33 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
.net = net,
.portid = portid,
};
- u32 hash;
-
- hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid));
- return rhashtable_lookup_compare(&table->hash, hash,
+ return rhashtable_lookup_compare(&table->hash, &portid,
&netlink_compare, &arg);
}
+static bool __netlink_insert(struct netlink_table *table, struct sock *sk)
+{
+ struct netlink_compare_arg arg = {
+ .net = sock_net(sk),
+ .portid = nlk_sk(sk)->portid,
+ };
+
+ return rhashtable_lookup_compare_insert(&table->hash,
+ &nlk_sk(sk)->node,
+ &netlink_compare, &arg);
+}
+
static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
{
struct netlink_table *table = &nl_table[protocol];
struct sock *sk;
- read_lock(&nl_table_lock);
rcu_read_lock();
sk = __netlink_lookup(table, portid, net);
if (sk)
sock_hold(sk);
rcu_read_unlock();
- read_unlock(&nl_table_lock);
return sk;
}
@@ -1072,29 +1047,33 @@ netlink_update_listeners(struct sock *sk)
* makes sure updates are visible before bind or setsockopt return. */
}
-static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
+static int netlink_insert(struct sock *sk, u32 portid)
{
struct netlink_table *table = &nl_table[sk->sk_protocol];
- int err = -EADDRINUSE;
+ int err;
- mutex_lock(&nl_sk_hash_lock);
- if (__netlink_lookup(table, portid, net))
- goto err;
+ lock_sock(sk);
err = -EBUSY;
if (nlk_sk(sk)->portid)
goto err;
err = -ENOMEM;
- if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX))
+ if (BITS_PER_LONG > 32 &&
+ unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
goto err;
nlk_sk(sk)->portid = portid;
sock_hold(sk);
- rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
+
err = 0;
+ if (!__netlink_insert(table, sk)) {
+ err = -EADDRINUSE;
+ sock_put(sk);
+ }
+
err:
- mutex_unlock(&nl_sk_hash_lock);
+ release_sock(sk);
return err;
}
@@ -1102,17 +1081,19 @@ static void netlink_remove(struct sock *sk)
{
struct netlink_table *table;
- mutex_lock(&nl_sk_hash_lock);
table = &nl_table[sk->sk_protocol];
if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
- mutex_unlock(&nl_sk_hash_lock);
netlink_table_grab();
- if (nlk_sk(sk)->subscriptions)
+ if (nlk_sk(sk)->subscriptions) {
__sk_del_bind_node(sk);
+ netlink_update_listeners(sk);
+ }
+ if (sk->sk_protocol == NETLINK_GENERIC)
+ atomic_inc(&genl_sk_destructing_cnt);
netlink_table_ungrab();
}
@@ -1159,8 +1140,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct module *module = NULL;
struct mutex *cb_mutex;
struct netlink_sock *nlk;
- int (*bind)(int group);
- void (*unbind)(int group);
+ int (*bind)(struct net *net, int group);
+ void (*unbind)(struct net *net, int group);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -1212,6 +1193,13 @@ out_module:
goto out;
}
+static void deferred_put_nlk_sk(struct rcu_head *head)
+{
+ struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
+
+ sock_put(&nlk->sk);
+}
+
static int netlink_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -1229,6 +1217,20 @@ static int netlink_release(struct socket *sock)
* will be purged.
*/
+ /* must not acquire netlink_table_lock in any way again before unbind
+ * and notifying genetlink is done as otherwise it might deadlock
+ */
+ if (nlk->netlink_unbind) {
+ int i;
+
+ for (i = 0; i < nlk->ngroups; i++)
+ if (test_bit(i, nlk->groups))
+ nlk->netlink_unbind(sock_net(sk), i + 1);
+ }
+ if (sk->sk_protocol == NETLINK_GENERIC &&
+ atomic_dec_return(&genl_sk_destructing_cnt) == 0)
+ wake_up(&genl_sk_destructing_waitq);
+
sock->sk = NULL;
wake_up_interruptible_all(&nlk->wait);
@@ -1246,8 +1248,8 @@ static int netlink_release(struct socket *sock)
module_put(nlk->module);
- netlink_table_grab();
if (netlink_is_kernel(sk)) {
+ netlink_table_grab();
BUG_ON(nl_table[sk->sk_protocol].registered == 0);
if (--nl_table[sk->sk_protocol].registered == 0) {
struct listeners *old;
@@ -1261,10 +1263,8 @@ static int netlink_release(struct socket *sock)
nl_table[sk->sk_protocol].flags = 0;
nl_table[sk->sk_protocol].registered = 0;
}
- } else if (nlk->subscriptions) {
- netlink_update_listeners(sk);
+ netlink_table_ungrab();
}
- netlink_table_ungrab();
kfree(nlk->groups);
nlk->groups = NULL;
@@ -1272,7 +1272,7 @@ static int netlink_release(struct socket *sock)
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
local_bh_enable();
- sock_put(sk);
+ call_rcu(&nlk->rcu, deferred_put_nlk_sk);
return 0;
}
@@ -1287,7 +1287,6 @@ static int netlink_autobind(struct socket *sock)
retry:
cond_resched();
- netlink_table_grab();
rcu_read_lock();
if (__netlink_lookup(table, portid, net)) {
/* Bind collision, search negative portid values. */
@@ -1295,13 +1294,11 @@ retry:
if (rover > -4097)
rover = -4097;
rcu_read_unlock();
- netlink_table_ungrab();
goto retry;
}
rcu_read_unlock();
- netlink_table_ungrab();
- err = netlink_insert(sk, net, portid);
+ err = netlink_insert(sk, portid);
if (err == -EADDRINUSE)
goto retry;
@@ -1430,9 +1427,10 @@ static int netlink_realloc_groups(struct sock *sk)
return err;
}
-static void netlink_unbind(int group, long unsigned int groups,
- struct netlink_sock *nlk)
+static void netlink_undo_bind(int group, long unsigned int groups,
+ struct sock *sk)
{
+ struct netlink_sock *nlk = nlk_sk(sk);
int undo;
if (!nlk->netlink_unbind)
@@ -1440,7 +1438,7 @@ static void netlink_unbind(int group, long unsigned int groups,
for (undo = 0; undo < group; undo++)
if (test_bit(undo, &groups))
- nlk->netlink_unbind(undo);
+ nlk->netlink_unbind(sock_net(sk), undo + 1);
}
static int netlink_bind(struct socket *sock, struct sockaddr *addr,
@@ -1478,20 +1476,20 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
for (group = 0; group < nlk->ngroups; group++) {
if (!test_bit(group, &groups))
continue;
- err = nlk->netlink_bind(group);
+ err = nlk->netlink_bind(net, group + 1);
if (!err)
continue;
- netlink_unbind(group, groups, nlk);
+ netlink_undo_bind(group, groups, sk);
return err;
}
}
if (!nlk->portid) {
err = nladdr->nl_pid ?
- netlink_insert(sk, net, nladdr->nl_pid) :
+ netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_unbind(nlk->ngroups, groups, nlk);
+ netlink_undo_bind(nlk->ngroups, groups, sk);
return err;
}
}
@@ -2142,7 +2140,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (!val || val - 1 >= nlk->ngroups)
return -EINVAL;
if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
- err = nlk->netlink_bind(val);
+ err = nlk->netlink_bind(sock_net(sk), val);
if (err)
return err;
}
@@ -2151,7 +2149,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
optname == NETLINK_ADD_MEMBERSHIP);
netlink_table_ungrab();
if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
- nlk->netlink_unbind(val);
+ nlk->netlink_unbind(sock_net(sk), val);
err = 0;
break;
@@ -2261,7 +2259,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct msghdr *msg, size_t len)
{
- struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
@@ -2275,10 +2272,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
- if (NULL == siocb->scm)
- siocb->scm = &scm;
-
- err = scm_send(sock, msg, siocb->scm, true);
+ err = scm_send(sock, msg, &scm, true);
if (err < 0)
return err;
@@ -2304,10 +2298,15 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out;
}
+ /* It's a really convoluted way for userland to ask for mmaped
+ * sendmsg(), but that's what we've got...
+ */
if (netlink_tx_is_mmaped(sk) &&
+ msg->msg_iter.type == ITER_IOVEC &&
+ msg->msg_iter.nr_segs == 1 &&
msg->msg_iter.iov->iov_base == NULL) {
err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
- siocb);
+ &scm);
goto out;
}
@@ -2321,7 +2320,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
NETLINK_CB(skb).portid = nlk->portid;
NETLINK_CB(skb).dst_group = dst_group;
- NETLINK_CB(skb).creds = siocb->scm->creds;
+ NETLINK_CB(skb).creds = scm.creds;
NETLINK_CB(skb).flags = netlink_skb_flags;
err = -EFAULT;
@@ -2343,7 +2342,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
out:
- scm_destroy(siocb->scm);
+ scm_destroy(&scm);
return err;
}
@@ -2351,7 +2350,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
struct msghdr *msg, size_t len,
int flags)
{
- struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
struct scm_cookie scm;
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -2414,11 +2412,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
if (nlk->flags & NETLINK_RECV_PKTINFO)
netlink_cmsg_recv_pktinfo(msg, skb);
- if (NULL == siocb->scm) {
- memset(&scm, 0, sizeof(scm));
- siocb->scm = &scm;
- }
- siocb->scm->creds = *NETLINK_CREDS(skb);
+ memset(&scm, 0, sizeof(scm));
+ scm.creds = *NETLINK_CREDS(skb);
if (flags & MSG_TRUNC)
copied = data_skb->len;
@@ -2433,7 +2428,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
}
}
- scm_recv(sock, msg, siocb->scm, flags);
+ scm_recv(sock, msg, &scm, flags);
out:
netlink_rcv_wake(sk);
return err ? : copied;
@@ -2494,7 +2489,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (cfg && cfg->input)
nlk_sk(sk)->netlink_rcv = cfg->input;
- if (netlink_insert(sk, net, 0))
+ if (netlink_insert(sk, 0))
goto out_sock_release;
nlk = nlk_sk(sk);
@@ -2896,97 +2891,97 @@ EXPORT_SYMBOL(nlmsg_notify);
#ifdef CONFIG_PROC_FS
struct nl_seq_iter {
struct seq_net_private p;
+ struct rhashtable_iter hti;
int link;
- int hash_idx;
};
-static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
+static int netlink_walk_start(struct nl_seq_iter *iter)
{
- struct nl_seq_iter *iter = seq->private;
- int i, j;
- struct netlink_sock *nlk;
- struct sock *s;
- loff_t off = 0;
-
- for (i = 0; i < MAX_LINKS; i++) {
- struct rhashtable *ht = &nl_table[i].hash;
- const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
- for (j = 0; j < tbl->size; j++) {
- rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
- s = (struct sock *)nlk;
+ int err;
- if (sock_net(s) != seq_file_net(seq))
- continue;
- if (off == pos) {
- iter->link = i;
- iter->hash_idx = j;
- return s;
- }
- ++off;
- }
- }
+ err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti);
+ if (err) {
+ iter->link = MAX_LINKS;
+ return err;
}
- return NULL;
+
+ err = rhashtable_walk_start(&iter->hti);
+ return err == -EAGAIN ? 0 : err;
}
-static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(nl_table_lock) __acquires(RCU)
+static void netlink_walk_stop(struct nl_seq_iter *iter)
{
- read_lock(&nl_table_lock);
- rcu_read_lock();
- return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ rhashtable_walk_stop(&iter->hti);
+ rhashtable_walk_exit(&iter->hti);
}
-static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *__netlink_seq_next(struct seq_file *seq)
{
- struct rhashtable *ht;
+ struct nl_seq_iter *iter = seq->private;
struct netlink_sock *nlk;
- struct nl_seq_iter *iter;
- struct net *net;
- int i, j;
- ++*pos;
+ do {
+ for (;;) {
+ int err;
- if (v == SEQ_START_TOKEN)
- return netlink_seq_socket_idx(seq, 0);
+ nlk = rhashtable_walk_next(&iter->hti);
- net = seq_file_net(seq);
- iter = seq->private;
- nlk = v;
+ if (IS_ERR(nlk)) {
+ if (PTR_ERR(nlk) == -EAGAIN)
+ continue;
- i = iter->link;
- ht = &nl_table[i].hash;
- rht_for_each_entry(nlk, nlk->node.next, ht, node)
- if (net_eq(sock_net((struct sock *)nlk), net))
- return nlk;
+ return nlk;
+ }
- j = iter->hash_idx + 1;
+ if (nlk)
+ break;
- do {
- const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
- for (; j < tbl->size; j++) {
- rht_for_each_entry(nlk, tbl->buckets[j], ht, node) {
- if (net_eq(sock_net((struct sock *)nlk), net)) {
- iter->link = i;
- iter->hash_idx = j;
- return nlk;
- }
- }
+ netlink_walk_stop(iter);
+ if (++iter->link >= MAX_LINKS)
+ return NULL;
+
+ err = netlink_walk_start(iter);
+ if (err)
+ return ERR_PTR(err);
}
+ } while (sock_net(&nlk->sk) != seq_file_net(seq));
- j = 0;
- } while (++i < MAX_LINKS);
+ return nlk;
+}
- return NULL;
+static void *netlink_seq_start(struct seq_file *seq, loff_t *posp)
+{
+ struct nl_seq_iter *iter = seq->private;
+ void *obj = SEQ_START_TOKEN;
+ loff_t pos;
+ int err;
+
+ iter->link = 0;
+
+ err = netlink_walk_start(iter);
+ if (err)
+ return ERR_PTR(err);
+
+ for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
+ obj = __netlink_seq_next(seq);
+
+ return obj;
+}
+
+static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ return __netlink_seq_next(seq);
}
static void netlink_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU) __releases(nl_table_lock)
{
- rcu_read_unlock();
- read_unlock(&nl_table_lock);
+ struct nl_seq_iter *iter = seq->private;
+
+ if (iter->link >= MAX_LINKS)
+ return;
+
+ netlink_walk_stop(iter);
}
@@ -3133,9 +3128,6 @@ static int __init netlink_proto_init(void)
.max_shift = 16, /* 64K */
.grow_decision = rht_grow_above_75,
.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
- .mutex_is_held = lockdep_nl_sk_hash_is_held,
-#endif
};
if (err != 0)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index b20a1731759b..89008405d6b4 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -2,6 +2,7 @@
#define _AF_NETLINK_H
#include <linux/rhashtable.h>
+#include <linux/atomic.h>
#include <net/sock.h>
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
@@ -39,8 +40,8 @@ struct netlink_sock {
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
- int (*netlink_bind)(int group);
- void (*netlink_unbind)(int group);
+ int (*netlink_bind)(struct net *net, int group);
+ void (*netlink_unbind)(struct net *net, int group);
struct module *module;
#ifdef CONFIG_NETLINK_MMAP
struct mutex pg_vec_lock;
@@ -50,6 +51,7 @@ struct netlink_sock {
#endif /* CONFIG_NETLINK_MMAP */
struct rhash_head node;
+ struct rcu_head rcu;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
@@ -65,14 +67,13 @@ struct netlink_table {
unsigned int groups;
struct mutex *cb_mutex;
struct module *module;
- int (*bind)(int group);
- void (*unbind)(int group);
+ int (*bind)(struct net *net, int group);
+ void (*unbind)(struct net *net, int group);
bool (*compare)(struct net *net, struct sock *sock);
int registered;
};
extern struct netlink_table *nl_table;
extern rwlock_t nl_table_lock;
-extern struct mutex nl_sk_hash_lock;
#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index de8c74a3c061..3ee63a3cff30 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
sk_diag_put_rings_cfg(sk, skb))
goto out_nlmsg_trim;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
out_nlmsg_trim:
nlmsg_cancel(skb, nlh);
@@ -103,7 +104,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
{
struct netlink_table *tbl = &nl_table[protocol];
struct rhashtable *ht = &tbl->hash;
- const struct bucket_table *htbl = rht_dereference(ht->tbl, ht);
+ const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
struct net *net = sock_net(skb->sk);
struct netlink_diag_req *req;
struct netlink_sock *nlsk;
@@ -113,7 +114,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
req = nlmsg_data(cb->nlh);
for (i = 0; i < htbl->size; i++) {
- rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) {
+ struct rhash_head *pos;
+
+ rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
sk = (struct sock *)nlsk;
if (!net_eq(sock_net(sk), net))
@@ -170,7 +173,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
req = nlmsg_data(cb->nlh);
- mutex_lock(&nl_sk_hash_lock);
+ rcu_read_lock();
read_lock(&nl_table_lock);
if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -184,7 +187,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
} else {
if (req->sdiag_protocol >= MAX_LINKS) {
read_unlock(&nl_table_lock);
- mutex_unlock(&nl_sk_hash_lock);
+ rcu_read_unlock();
return -ENOENT;
}
@@ -192,7 +195,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
read_unlock(&nl_table_lock);
- mutex_unlock(&nl_sk_hash_lock);
+ rcu_read_unlock();
return skb->len;
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 76393f2f4b22..2ed5f964772e 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -23,6 +23,9 @@
static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
static DECLARE_RWSEM(cb_lock);
+atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0);
+DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq);
+
void genl_lock(void)
{
mutex_lock(&genl_mutex);
@@ -435,15 +438,18 @@ int genl_unregister_family(struct genl_family *family)
genl_lock_all();
- genl_unregister_mc_groups(family);
-
list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
if (family->id != rc->id || strcmp(rc->name, family->name))
continue;
+ genl_unregister_mc_groups(family);
+
list_del(&rc->family_list);
family->n_ops = 0;
- genl_unlock_all();
+ up_write(&cb_lock);
+ wait_event(genl_sk_destructing_waitq,
+ atomic_read(&genl_sk_destructing_cnt) == 0);
+ genl_unlock();
kfree(family->attrbuf);
genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
@@ -756,7 +762,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
nla_nest_end(skb, nla_grps);
}
- return genlmsg_end(skb, hdr);
+ genlmsg_end(skb, hdr);
+ return 0;
nla_put_failure:
genlmsg_cancel(skb, hdr);
@@ -796,7 +803,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family,
nla_nest_end(skb, nest);
nla_nest_end(skb, nla_grps);
- return genlmsg_end(skb, hdr);
+ genlmsg_end(skb, hdr);
+ return 0;
nla_put_failure:
genlmsg_cancel(skb, hdr);
@@ -983,11 +991,63 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
{ .name = "notify", },
};
+static int genl_bind(struct net *net, int group)
+{
+ int i, err = -ENOENT;
+
+ down_read(&cb_lock);
+ for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+ struct genl_family *f;
+
+ list_for_each_entry(f, genl_family_chain(i), family_list) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
+
+ if (!f->netnsok && net != &init_net)
+ err = -ENOENT;
+ else if (f->mcast_bind)
+ err = f->mcast_bind(net, fam_grp);
+ else
+ err = 0;
+ break;
+ }
+ }
+ }
+ up_read(&cb_lock);
+
+ return err;
+}
+
+static void genl_unbind(struct net *net, int group)
+{
+ int i;
+
+ down_read(&cb_lock);
+ for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+ struct genl_family *f;
+
+ list_for_each_entry(f, genl_family_chain(i), family_list) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
+
+ if (f->mcast_unbind)
+ f->mcast_unbind(net, fam_grp);
+ break;
+ }
+ }
+ }
+ up_read(&cb_lock);
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
+ .bind = genl_bind,
+ .unbind = genl_unbind,
};
/* we'll bump the group number right afterwards */