diff options
-rw-r--r-- | include/linux/rhashtable.h | 35 | ||||
-rw-r--r-- | lib/rhashtable.c | 167 | ||||
-rw-r--r-- | net/netfilter/nft_hash.c | 53 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 130 |
4 files changed, 302 insertions, 83 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e0337844358e..58851275fed9 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -18,6 +18,7 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H +#include <linux/compiler.h> #include <linux/list_nulls.h> #include <linux/workqueue.h> #include <linux/mutex.h> @@ -111,6 +112,7 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping + * @walkers: List of active walkers * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -121,9 +123,36 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; + struct list_head walkers; bool being_destroyed; }; +/** + * struct rhashtable_walker - Hash table walker + * @list: List entry on list of walkers + * @resize: Resize event occured + */ +struct rhashtable_walker { + struct list_head list; + bool resize; +}; + +/** + * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * @ht: Table to iterate through + * @p: Current pointer + * @walker: Associated rhashtable walker + * @slot: Current slot + * @skip: Number of entries to skip in slot + */ +struct rhashtable_iter { + struct rhashtable *ht; + struct rhash_head *p; + struct rhashtable_walker *walker; + unsigned int slot; + unsigned int skip; +}; + static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) { return NULLS_MARKER(ht->p.nulls_base + hash); @@ -179,6 +208,12 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht, bool (*compare)(void *, void *), void *arg); +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); +void rhashtable_walk_exit(struct rhashtable_iter *iter); +int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); +void *rhashtable_walk_next(struct rhashtable_iter *iter); +void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); + void rhashtable_destroy(struct rhashtable *ht); #define rht_dereference(p, ht) \ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index c41e21096373..057919164e23 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -484,16 +484,24 @@ static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; + struct rhashtable_walker *walker; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); + if (ht->being_destroyed) + goto unlock; + tbl = rht_dereference(ht->tbl, ht); + list_for_each_entry(walker, &ht->walkers, list) + walker->resize = true; + if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) rhashtable_expand(ht); else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) rhashtable_shrink(ht); +unlock: mutex_unlock(&ht->mutex); } @@ -818,6 +826,164 @@ exit: } EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); +/** + * rhashtable_walk_init - Initialise an iterator + * @ht: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit if this function returns + * successfully. + */ +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) +{ + iter->ht = ht; + iter->p = NULL; + iter->slot = 0; + iter->skip = 0; + + iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL); + if (!iter->walker) + return -ENOMEM; + + mutex_lock(&ht->mutex); + list_add(&iter->walker->list, &ht->walkers); + mutex_unlock(&ht->mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_init); + +/** + * rhashtable_walk_exit - Free an iterator + * @iter: Hash table Iterator + * + * This function frees resources allocated by rhashtable_walk_init. + */ +void rhashtable_walk_exit(struct rhashtable_iter *iter) +{ + mutex_lock(&iter->ht->mutex); + list_del(&iter->walker->list); + mutex_unlock(&iter->ht->mutex); + kfree(iter->walker); +} +EXPORT_SYMBOL_GPL(rhashtable_walk_exit); + +/** + * rhashtable_walk_start - Start a hash table walk + * @iter: Hash table iterator + * + * Start a hash table walk. Note that we take the RCU lock in all + * cases including when we return an error. So you must always call + * rhashtable_walk_stop to clean up. + * + * Returns zero if successful. + * + * Returns -EAGAIN if resize event occured. Note that the iterator + * will rewind back to the beginning and you may use it immediately + * by calling rhashtable_walk_next. + */ +int rhashtable_walk_start(struct rhashtable_iter *iter) +{ + rcu_read_lock(); + + if (iter->walker->resize) { + iter->slot = 0; + iter->skip = 0; + iter->walker->resize = false; + return -EAGAIN; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_start); + +/** + * rhashtable_walk_next - Return the next object and advance the iterator + * @iter: Hash table iterator + * + * Note that you must call rhashtable_walk_stop when you are finished + * with the walk. + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occured. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_next(struct rhashtable_iter *iter) +{ + const struct bucket_table *tbl; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + void *obj = NULL; + + tbl = rht_dereference_rcu(ht->tbl, ht); + + if (p) { + p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + goto next; + } + + for (; iter->slot < tbl->size; iter->slot++) { + int skip = iter->skip; + + rht_for_each_rcu(p, tbl, iter->slot) { + if (!skip) + break; + skip--; + } + +next: + if (!rht_is_a_nulls(p)) { + iter->skip++; + iter->p = p; + obj = rht_obj(ht, p); + goto out; + } + + iter->skip = 0; + } + + iter->p = NULL; + +out: + if (iter->walker->resize) { + iter->p = NULL; + iter->slot = 0; + iter->skip = 0; + iter->walker->resize = false; + return ERR_PTR(-EAGAIN); + } + + return obj; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_next); + +/** + * rhashtable_walk_stop - Finish a hash table walk + * @iter: Hash table iterator + * + * Finish a hash table walk. + */ +void rhashtable_walk_stop(struct rhashtable_iter *iter) +{ + rcu_read_unlock(); + iter->p = NULL; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_stop); + static size_t rounded_hashtable_size(struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), @@ -890,6 +1056,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); memcpy(&ht->p, params, sizeof(*params)); + INIT_LIST_HEAD(&ht->walkers); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 75887d7d2c6a..61e6c407476a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -130,31 +130,50 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_iter *iter) { struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; const struct nft_hash_elem *he; + struct rhashtable_iter hti; struct nft_set_elem elem; - unsigned int i; + int err; - tbl = rht_dereference_rcu(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) { - struct rhash_head *pos; + err = rhashtable_walk_init(priv, &hti); + iter->err = err; + if (err) + return; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) { + iter->err = err; + goto out; + } - rht_for_each_entry_rcu(he, pos, tbl, i, node) { - if (iter->count < iter->skip) - goto cont; + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + err = PTR_ERR(he); + if (err != -EAGAIN) { + iter->err = err; + goto out; + } + } + + if (iter->count < iter->skip) + goto cont; + + memcpy(&elem.key, &he->key, sizeof(elem.key)); + if (set->flags & NFT_SET_MAP) + memcpy(&elem.data, he->data, sizeof(elem.data)); + elem.flags = 0; - memcpy(&elem.key, &he->key, sizeof(elem.key)); - if (set->flags & NFT_SET_MAP) - memcpy(&elem.data, he->data, sizeof(elem.data)); - elem.flags = 0; + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + goto out; - iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) - return; cont: - iter->count++; - } + iter->count++; } + +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); } static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a36777b7cfb6..155854802d44 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2886,99 +2886,97 @@ EXPORT_SYMBOL(nlmsg_notify); #ifdef CONFIG_PROC_FS struct nl_seq_iter { struct seq_net_private p; + struct rhashtable_iter hti; int link; - int hash_idx; }; -static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) +static int netlink_walk_start(struct nl_seq_iter *iter) { - struct nl_seq_iter *iter = seq->private; - int i, j; - struct netlink_sock *nlk; - struct sock *s; - loff_t off = 0; - - for (i = 0; i < MAX_LINKS; i++) { - struct rhashtable *ht = &nl_table[i].hash; - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - for (j = 0; j < tbl->size; j++) { - struct rhash_head *node; - - rht_for_each_entry_rcu(nlk, node, tbl, j, node) { - s = (struct sock *)nlk; + int err; - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) { - iter->link = i; - iter->hash_idx = j; - return s; - } - ++off; - } - } + err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); + if (err) { + iter->link = MAX_LINKS; + return err; } - return NULL; + + err = rhashtable_walk_start(&iter->hti); + return err == -EAGAIN ? 0 : err; } -static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) +static void netlink_walk_stop(struct nl_seq_iter *iter) { - rcu_read_lock(); - return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; + rhashtable_walk_stop(&iter->hti); + rhashtable_walk_exit(&iter->hti); } -static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *__netlink_seq_next(struct seq_file *seq) { - struct rhashtable *ht; - const struct bucket_table *tbl; - struct rhash_head *node; + struct nl_seq_iter *iter = seq->private; struct netlink_sock *nlk; - struct nl_seq_iter *iter; - struct net *net; - int i, j; - ++*pos; + do { + for (;;) { + int err; - if (v == SEQ_START_TOKEN) - return netlink_seq_socket_idx(seq, 0); + nlk = rhashtable_walk_next(&iter->hti); - net = seq_file_net(seq); - iter = seq->private; - nlk = v; + if (IS_ERR(nlk)) { + if (PTR_ERR(nlk) == -EAGAIN) + continue; - i = iter->link; - ht = &nl_table[i].hash; - tbl = rht_dereference_rcu(ht->tbl, ht); - rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node) - if (net_eq(sock_net((struct sock *)nlk), net)) - return nlk; + return nlk; + } - j = iter->hash_idx + 1; + if (nlk) + break; - do { + netlink_walk_stop(iter); + if (++iter->link >= MAX_LINKS) + return NULL; - for (; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, node, tbl, j, node) { - if (net_eq(sock_net((struct sock *)nlk), net)) { - iter->link = i; - iter->hash_idx = j; - return nlk; - } - } + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); } + } while (sock_net(&nlk->sk) != seq_file_net(seq)); - j = 0; - } while (++i < MAX_LINKS); + return nlk; +} - return NULL; +static void *netlink_seq_start(struct seq_file *seq, loff_t *posp) +{ + struct nl_seq_iter *iter = seq->private; + void *obj = SEQ_START_TOKEN; + loff_t pos; + int err; + + iter->link = 0; + + err = netlink_walk_start(iter); + if (err) + return ERR_PTR(err); + + for (pos = *posp; pos && obj && !IS_ERR(obj); pos--) + obj = __netlink_seq_next(seq); + + return obj; +} + +static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return __netlink_seq_next(seq); } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) { - rcu_read_unlock(); + struct nl_seq_iter *iter = seq->private; + + if (iter->link >= MAX_LINKS) + return; + + netlink_walk_stop(iter); } |