diff options
Diffstat (limited to 'net/netlink')
-rw-r--r-- | net/netlink/af_netlink.c | 352 | ||||
-rw-r--r-- | net/netlink/af_netlink.h | 11 | ||||
-rw-r--r-- | net/netlink/diag.c | 15 | ||||
-rw-r--r-- | net/netlink/genetlink.c | 70 |
4 files changed, 252 insertions, 196 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ef5f77b44ec7..2702673f0f23 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -61,6 +61,7 @@ #include <linux/rhashtable.h> #include <asm/cacheflush.h> #include <linux/hash.h> +#include <linux/genetlink.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -97,12 +98,12 @@ static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: - * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock - * combined with an RCU read-side lock. Insertion and removal are protected - * with nl_sk_hash_lock while using RCU list modification primitives and may - * run in parallel to nl_table_lock protected lookups. Destruction of the - * Netlink socket may only occur *after* nl_table_lock has been acquired - * either during or after the socket has been removed from the list. + * Lookup and traversal are protected with an RCU read-side lock. Insertion + * and removal are protected with per bucket lock while using RCU list + * modification primitives and may run in parallel to RCU protected lookups. + * Destruction of the Netlink socket may only occur *after* nl_table_lock has + * been acquired * either during or after the socket has been removed from + * the list and after an RCU grace period. */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); @@ -110,19 +111,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); -/* Protects netlink socket hash table mutations */ -DEFINE_MUTEX(nl_sk_hash_lock); -EXPORT_SYMBOL_GPL(nl_sk_hash_lock); - -#ifdef CONFIG_PROVE_LOCKING -static int lockdep_nl_sk_hash_is_held(void *parent) -{ - if (debug_locks) - return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); - return 1; -} -#endif - static ATOMIC_NOTIFIER_HEAD(netlink_chain); static DEFINE_SPINLOCK(netlink_tap_lock); @@ -525,14 +513,14 @@ out: return err; } -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) { #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 struct page *p_start, *p_end; /* First page is flushed through netlink_{get,set}_status */ p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); + p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); while (p_start <= p_end) { flush_dcache_page(p_start); p_start++; @@ -550,9 +538,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) static void netlink_set_status(struct nl_mmap_hdr *hdr, enum nl_mmap_status status) { + smp_mb(); hdr->nm_status = status; flush_dcache_page(pgvec_to_page(hdr)); - smp_wmb(); } static struct nl_mmap_hdr * @@ -707,31 +695,23 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, u32 dst_portid, u32 dst_group, - struct sock_iocb *siocb) + struct scm_cookie *scm) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; struct nl_mmap_hdr *hdr; struct sk_buff *skb; unsigned int maxlen; - bool excl = true; int err = 0, len = 0; - /* Netlink messages are validated by the receiver before processing. - * In order to avoid userspace changing the contents of the message - * after validation, the socket and the ring may only be used by a - * single process, otherwise we fall back to copying. - */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 || - atomic_read(&nlk->mapped) > 1) - excl = false; - mutex_lock(&nlk->pg_vec_lock); ring = &nlk->tx_ring; maxlen = ring->frame_size - NL_MMAP_HDRLEN; do { + unsigned int nm_len; + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); if (hdr == NULL) { if (!(msg->msg_flags & MSG_DONTWAIT) && @@ -739,41 +719,29 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, schedule(); continue; } - if (hdr->nm_len > maxlen) { + + nm_len = ACCESS_ONCE(hdr->nm_len); + if (nm_len > maxlen) { err = -EINVAL; goto out; } - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, nm_len); - if (likely(dst_portid == 0 && dst_group == 0 && excl)) { - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - sock_hold(sk); - netlink_ring_setup_skb(skb, sk, ring, hdr); - NETLINK_CB(skb).flags |= NETLINK_SKB_TX; - __skb_put(skb, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - } else { - skb = alloc_skb(hdr->nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, hdr->nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); + skb = alloc_skb(nm_len, GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto out; } + __skb_put(skb, nm_len); + memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); + netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); netlink_increment_head(ring); NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm->creds; err = security_netlink_send(sk, skb); if (err) { @@ -813,7 +781,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) hdr->nm_pid = NETLINK_CB(skb).creds.pid; hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, hdr->nm_len); netlink_set_status(hdr, NL_MMAP_STATUS_VALID); NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; @@ -852,7 +820,7 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) #define netlink_tx_is_mmaped(sk) false #define netlink_mmap sock_no_mmap #define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 +#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 #endif /* CONFIG_NETLINK_MMAP */ static void netlink_skb_destructor(struct sk_buff *skb) @@ -1022,26 +990,33 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, .net = net, .portid = portid, }; - u32 hash; - - hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid)); - return rhashtable_lookup_compare(&table->hash, hash, + return rhashtable_lookup_compare(&table->hash, &portid, &netlink_compare, &arg); } +static bool __netlink_insert(struct netlink_table *table, struct sock *sk) +{ + struct netlink_compare_arg arg = { + .net = sock_net(sk), + .portid = nlk_sk(sk)->portid, + }; + + return rhashtable_lookup_compare_insert(&table->hash, + &nlk_sk(sk)->node, + &netlink_compare, &arg); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { struct netlink_table *table = &nl_table[protocol]; struct sock *sk; - read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); - read_unlock(&nl_table_lock); return sk; } @@ -1072,29 +1047,33 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 portid) +static int netlink_insert(struct sock *sk, u32 portid) { struct netlink_table *table = &nl_table[sk->sk_protocol]; - int err = -EADDRINUSE; + int err; - mutex_lock(&nl_sk_hash_lock); - if (__netlink_lookup(table, portid, net)) - goto err; + lock_sock(sk); err = -EBUSY; if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; - if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX)) + if (BITS_PER_LONG > 32 && + unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) goto err; nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node); + err = 0; + if (!__netlink_insert(table, sk)) { + err = -EADDRINUSE; + sock_put(sk); + } + err: - mutex_unlock(&nl_sk_hash_lock); + release_sock(sk); return err; } @@ -1102,17 +1081,19 @@ static void netlink_remove(struct sock *sk) { struct netlink_table *table; - mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } - mutex_unlock(&nl_sk_hash_lock); netlink_table_grab(); - if (nlk_sk(sk)->subscriptions) + if (nlk_sk(sk)->subscriptions) { __sk_del_bind_node(sk); + netlink_update_listeners(sk); + } + if (sk->sk_protocol == NETLINK_GENERIC) + atomic_inc(&genl_sk_destructing_cnt); netlink_table_ungrab(); } @@ -1159,8 +1140,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, struct module *module = NULL; struct mutex *cb_mutex; struct netlink_sock *nlk; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); int err = 0; sock->state = SS_UNCONNECTED; @@ -1212,6 +1193,13 @@ out_module: goto out; } +static void deferred_put_nlk_sk(struct rcu_head *head) +{ + struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + + sock_put(&nlk->sk); +} + static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -1229,6 +1217,20 @@ static int netlink_release(struct socket *sock) * will be purged. */ + /* must not acquire netlink_table_lock in any way again before unbind + * and notifying genetlink is done as otherwise it might deadlock + */ + if (nlk->netlink_unbind) { + int i; + + for (i = 0; i < nlk->ngroups; i++) + if (test_bit(i, nlk->groups)) + nlk->netlink_unbind(sock_net(sk), i + 1); + } + if (sk->sk_protocol == NETLINK_GENERIC && + atomic_dec_return(&genl_sk_destructing_cnt) == 0) + wake_up(&genl_sk_destructing_waitq); + sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); @@ -1246,8 +1248,8 @@ static int netlink_release(struct socket *sock) module_put(nlk->module); - netlink_table_grab(); if (netlink_is_kernel(sk)) { + netlink_table_grab(); BUG_ON(nl_table[sk->sk_protocol].registered == 0); if (--nl_table[sk->sk_protocol].registered == 0) { struct listeners *old; @@ -1261,10 +1263,8 @@ static int netlink_release(struct socket *sock) nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } - } else if (nlk->subscriptions) { - netlink_update_listeners(sk); + netlink_table_ungrab(); } - netlink_table_ungrab(); kfree(nlk->groups); nlk->groups = NULL; @@ -1272,7 +1272,7 @@ static int netlink_release(struct socket *sock) local_bh_disable(); sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); local_bh_enable(); - sock_put(sk); + call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -1287,7 +1287,6 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); - netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1295,13 +1294,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); - netlink_table_ungrab(); goto retry; } rcu_read_unlock(); - netlink_table_ungrab(); - err = netlink_insert(sk, net, portid); + err = netlink_insert(sk, portid); if (err == -EADDRINUSE) goto retry; @@ -1430,9 +1427,10 @@ static int netlink_realloc_groups(struct sock *sk) return err; } -static void netlink_unbind(int group, long unsigned int groups, - struct netlink_sock *nlk) +static void netlink_undo_bind(int group, long unsigned int groups, + struct sock *sk) { + struct netlink_sock *nlk = nlk_sk(sk); int undo; if (!nlk->netlink_unbind) @@ -1440,7 +1438,7 @@ static void netlink_unbind(int group, long unsigned int groups, for (undo = 0; undo < group; undo++) if (test_bit(undo, &groups)) - nlk->netlink_unbind(undo); + nlk->netlink_unbind(sock_net(sk), undo + 1); } static int netlink_bind(struct socket *sock, struct sockaddr *addr, @@ -1478,20 +1476,20 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, for (group = 0; group < nlk->ngroups; group++) { if (!test_bit(group, &groups)) continue; - err = nlk->netlink_bind(group); + err = nlk->netlink_bind(net, group + 1); if (!err) continue; - netlink_unbind(group, groups, nlk); + netlink_undo_bind(group, groups, sk); return err; } } if (!nlk->portid) { err = nladdr->nl_pid ? - netlink_insert(sk, net, nladdr->nl_pid) : + netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups, groups, nlk); + netlink_undo_bind(nlk->ngroups, groups, sk); return err; } } @@ -2142,7 +2140,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (!val || val - 1 >= nlk->ngroups) return -EINVAL; if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) { - err = nlk->netlink_bind(val); + err = nlk->netlink_bind(sock_net(sk), val); if (err) return err; } @@ -2151,7 +2149,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind) - nlk->netlink_unbind(val); + nlk->netlink_unbind(sock_net(sk), val); err = 0; break; @@ -2261,7 +2259,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name); @@ -2275,10 +2272,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) - siocb->scm = &scm; - - err = scm_send(sock, msg, siocb->scm, true); + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; @@ -2304,10 +2298,15 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } + /* It's a really convoluted way for userland to ask for mmaped + * sendmsg(), but that's what we've got... + */ if (netlink_tx_is_mmaped(sk) && + msg->msg_iter.type == ITER_IOVEC && + msg->msg_iter.nr_segs == 1 && msg->msg_iter.iov->iov_base == NULL) { err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - siocb); + &scm); goto out; } @@ -2321,7 +2320,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).creds = scm.creds; NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; @@ -2343,7 +2342,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: - scm_destroy(siocb->scm); + scm_destroy(&scm); return err; } @@ -2351,7 +2350,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2414,11 +2412,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (nlk->flags & NETLINK_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); - if (NULL == siocb->scm) { - memset(&scm, 0, sizeof(scm)); - siocb->scm = &scm; - } - siocb->scm->creds = *NETLINK_CREDS(skb); + memset(&scm, 0, sizeof(scm)); + scm.creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) copied = data_skb->len; @@ -2433,7 +2428,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } } - scm_recv(sock, msg, siocb->scm, flags); + scm_recv(sock, msg, &scm, flags); out: netlink_rcv_wake(sk); return err ? : copied; @@ -2494,7 +2489,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (cfg && cfg->input) nlk_sk(sk)->netlink_rcv = cfg->input; - if (netlink_insert(sk, net, 0)) + if (netlink_insert(sk, 0)) goto out_sock_release; nlk = nlk_sk(sk); @@ -2896,97 +2891,97 @@ EXPORT_SYMBOL(nlmsg_notify); #ifdef CONFIG_PROC_FS struct nl_seq_iter { struct seq_net_private p; + struct rhashtable_iter hti; int link; - int hash_idx; }; -static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) +static int netlink_walk_start(struct nl_seq_iter *iter) { - struct nl_seq_iter *iter = seq->private; - int i, j; - struct netlink_sock *nlk; - struct sock *s; - loff_t off = 0; - - for (i = 0; i < MAX_LINKS; i++) { - struct rhashtable *ht = &nl_table[i].hash; - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - for (j = 0; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { - s = (struct sock *)nlk; + int err; - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) { - iter->link = i; - iter->hash_idx = j; - return s; - } - ++off; - } - } + err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); + if (err) { + iter->link = MAX_LINKS; + return err; } - return NULL; + + err = rhashtable_walk_start(&iter->hti); + return err == -EAGAIN ? 0 : err; } -static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nl_table_lock) __acquires(RCU) +static void netlink_walk_stop(struct nl_seq_iter *iter) { - read_lock(&nl_table_lock); - rcu_read_lock(); - return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; + rhashtable_walk_stop(&iter->hti); + rhashtable_walk_exit(&iter->hti); } -static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *__netlink_seq_next(struct seq_file *seq) { - struct rhashtable *ht; + struct nl_seq_iter *iter = seq->private; struct netlink_sock *nlk; - struct nl_seq_iter *iter; - struct net *net; - int i, j; - ++*pos; + do { + for (;;) { + int err; - if (v == SEQ_START_TOKEN) - return netlink_seq_socket_idx(seq, 0); + nlk = rhashtable_walk_next(&iter->hti); - net = seq_file_net(seq); - iter = seq->private; - nlk = v; + if (IS_ERR(nlk)) { + if (PTR_ERR(nlk) == -EAGAIN) + continue; - i = iter->link; - ht = &nl_table[i].hash; - rht_for_each_entry(nlk, nlk->node.next, ht, node) - if (net_eq(sock_net((struct sock *)nlk), net)) - return nlk; + return nlk; + } - j = iter->hash_idx + 1; + if (nlk) + break; - do { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - for (; j < tbl->size; j++) { - rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { - if (net_eq(sock_net((struct sock *)nlk), net)) { - iter->link = i; - iter->hash_idx = j; - return nlk; - } - } + netlink_walk_stop(iter); + if (++iter->link >= MAX_LINKS) + return NULL; + + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); } + } while (sock_net(&nlk->sk) != seq_file_net(seq)); - j = 0; - } while (++i < MAX_LINKS); + return nlk; +} - return NULL; +static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) +{ + struct nl_seq_iter *iter = seq->private; + void *obj = SEQ_START_TOKEN; + loff_t pos; + int err; + + iter->link = 0; + + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); + + for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) + obj = __netlink_seq_next(seq); + + return obj; +} + +static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return __netlink_seq_next(seq); } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) __releases(nl_table_lock) { - rcu_read_unlock(); - read_unlock(&nl_table_lock); + struct nl_seq_iter *iter = seq->private; + + if (iter->link >= MAX_LINKS) + return; + + netlink_walk_stop(iter); } @@ -3133,9 +3128,6 @@ static int __init netlink_proto_init(void) .max_shift = 16, /* 64K */ .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = lockdep_nl_sk_hash_is_held, -#endif }; if (err != 0) diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index b20a1731759b..89008405d6b4 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -2,6 +2,7 @@ #define _AF_NETLINK_H #include <linux/rhashtable.h> +#include <linux/atomic.h> #include <net/sock.h> #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -39,8 +40,8 @@ struct netlink_sock { struct mutex *cb_mutex; struct mutex cb_def_mutex; void (*netlink_rcv)(struct sk_buff *skb); - int (*netlink_bind)(int group); - void (*netlink_unbind)(int group); + int (*netlink_bind)(struct net *net, int group); + void (*netlink_unbind)(struct net *net, int group); struct module *module; #ifdef CONFIG_NETLINK_MMAP struct mutex pg_vec_lock; @@ -50,6 +51,7 @@ struct netlink_sock { #endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; + struct rcu_head rcu; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) @@ -65,14 +67,13 @@ struct netlink_table { unsigned int groups; struct mutex *cb_mutex; struct module *module; - int (*bind)(int group); - void (*unbind)(int group); + int (*bind)(struct net *net, int group); + void (*unbind)(struct net *net, int group); bool (*compare)(struct net *net, struct sock *sock); int registered; }; extern struct netlink_table *nl_table; extern rwlock_t nl_table_lock; -extern struct mutex nl_sk_hash_lock; #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index de8c74a3c061..3ee63a3cff30 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sk_diag_put_rings_cfg(sk, skb)) goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); + nlmsg_end(skb, nlh); + return 0; out_nlmsg_trim: nlmsg_cancel(skb, nlh); @@ -103,7 +104,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, { struct netlink_table *tbl = &nl_table[protocol]; struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference(ht->tbl, ht); + const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; @@ -113,7 +114,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, req = nlmsg_data(cb->nlh); for (i = 0; i < htbl->size; i++) { - rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) { + struct rhash_head *pos; + + rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { sk = (struct sock *)nlsk; if (!net_eq(sock_net(sk), net)) @@ -170,7 +173,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) req = nlmsg_data(cb->nlh); - mutex_lock(&nl_sk_hash_lock); + rcu_read_lock(); read_lock(&nl_table_lock); if (req->sdiag_protocol == NDIAG_PROTO_ALL) { @@ -184,7 +187,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return -ENOENT; } @@ -192,7 +195,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } read_unlock(&nl_table_lock); - mutex_unlock(&nl_sk_hash_lock); + rcu_read_unlock(); return skb->len; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 76393f2f4b22..2ed5f964772e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -23,6 +23,9 @@ static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ static DECLARE_RWSEM(cb_lock); +atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq); + void genl_lock(void) { mutex_lock(&genl_mutex); @@ -435,15 +438,18 @@ int genl_unregister_family(struct genl_family *family) genl_lock_all(); - genl_unregister_mc_groups(family); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { if (family->id != rc->id || strcmp(rc->name, family->name)) continue; + genl_unregister_mc_groups(family); + list_del(&rc->family_list); family->n_ops = 0; - genl_unlock_all(); + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -756,7 +762,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, nla_nest_end(skb, nla_grps); } - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -796,7 +803,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family, nla_nest_end(skb, nest); nla_nest_end(skb, nla_grps); - return genlmsg_end(skb, hdr); + genlmsg_end(skb, hdr); + return 0; nla_put_failure: genlmsg_cancel(skb, hdr); @@ -983,11 +991,63 @@ static struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static int genl_bind(struct net *net, int group) +{ + int i, err = -ENOENT; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); + else + err = 0; + break; + } + } + } + up_read(&cb_lock); + + return err; +} + +static void genl_unbind(struct net *net, int group) +{ + int i; + + down_read(&cb_lock); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { + struct genl_family *f; + + list_for_each_entry(f, genl_family_chain(i), family_list) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (f->mcast_unbind) + f->mcast_unbind(net, fam_grp); + break; + } + } + } + up_read(&cb_lock); +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, + .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ |