summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJohannes Berg <johannes.berg@intel.com>2016-10-04 09:22:19 +0200
committerJohannes Berg <johannes.berg@intel.com>2016-10-04 09:46:44 +0200
commit1e1430d5282bc3a572465ef3261eea793d98a653 (patch)
tree81c8883606ed2dd821f4509581888505d48631dd /lib
parentmac80211: Move reorder-sensitive TX handlers to after TXQ dequeue (diff)
parentMerge branch 'ncsi-next' (diff)
downloadlinux-1e1430d5282bc3a572465ef3261eea793d98a653.tar.xz
linux-1e1430d5282bc3a572465ef3261eea793d98a653.zip
Merge remote-tracking branch 'net-next/master' into mac80211-next
Resolve the merge conflict between Felix's/my and Toke's patches coming into the tree through net and mac80211-next respectively. Most of Felix's changes go away due to Toke's new infrastructure work, my patch changes to "goto begin" (the label wasn't there before) instead of returning NULL so flow control towards drivers is preserved better. Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile2
-rw-r--r--lib/iov_iter.c24
-rw-r--r--lib/radix-tree.c8
-rw-r--r--lib/random32.c4
-rw-r--r--lib/rhashtable.c258
-rw-r--r--lib/win_minmax.c98
7 files changed, 314 insertions, 82 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e2cca509231..cab7405f48d2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -821,7 +821,7 @@ config DETECT_HUNG_TASK
help
Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in
- uninterruptible "D" state indefinitiley.
+ uninterruptible "D" state indefinitely.
When a hung task is detected, the kernel will print the
current stack trace (which you should report), but the
diff --git a/lib/Makefile b/lib/Makefile
index 5dc77a8ec297..df747e5eeb7a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
- earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
+ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 9e8c7386b3a0..7e3138cfc8c9 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -291,33 +291,13 @@ done:
}
/*
- * Fault in the first iovec of the given iov_iter, to a maximum length
- * of bytes. Returns 0 on success, or non-zero if the memory could not be
- * accessed (ie. because it is an invalid address).
- *
- * writev-intensive code may want this to prefault several iovecs -- that
- * would be possible (callers must not rely on the fact that _only_ the
- * first iovec will be faulted with the current implementation).
- */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
-{
- if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
- char __user *buf = i->iov->iov_base + i->iov_offset;
- bytes = min(bytes, i->iov->iov_len - i->iov_offset);
- return fault_in_pages_readable(buf, bytes);
- }
- return 0;
-}
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
-
-/*
* Fault in one or more iovecs of the given iov_iter, to a maximum length of
* bytes. For each iovec, fault in each page that constitutes the iovec.
*
* Return 0 on success, or non-zero if the memory could not be accessed (i.e.
* because it is an invalid address).
*/
-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
{
size_t skip = i->iov_offset;
const struct iovec *iov;
@@ -334,7 +314,7 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
}
return 0;
}
-EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
void iov_iter_init(struct iov_iter *i, int direction,
const struct iovec *iov, unsigned long nr_segs,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1b7bf7314141..91f0727e3cad 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -105,10 +105,10 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent,
#ifdef CONFIG_RADIX_TREE_MULTIORDER
if (radix_tree_is_internal_node(entry)) {
- unsigned long siboff = get_slot_offset(parent, entry);
- if (siboff < RADIX_TREE_MAP_SIZE) {
- offset = siboff;
- entry = rcu_dereference_raw(parent->slots[offset]);
+ if (is_sibling_entry(parent, entry)) {
+ void **sibentry = (void **) entry_to_node(entry);
+ offset = get_slot_offset(parent, sibentry);
+ entry = rcu_dereference_raw(*sibentry);
}
}
#endif
diff --git a/lib/random32.c b/lib/random32.c
index 69ed593aab07..915982b304bb 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -81,7 +81,7 @@ u32 prandom_u32(void)
u32 res;
res = prandom_u32_state(state);
- put_cpu_var(state);
+ put_cpu_var(net_rand_state);
return res;
}
@@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes)
struct rnd_state *state = &get_cpu_var(net_rand_state);
prandom_bytes_state(state, buf, bytes);
- put_cpu_var(state);
+ put_cpu_var(net_rand_state);
}
EXPORT_SYMBOL(prandom_bytes);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 06c28728bb53..32d0ad058380 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work)
schedule_work(&ht->run_work);
}
-static bool rhashtable_check_elasticity(struct rhashtable *ht,
- struct bucket_table *tbl,
- unsigned int hash)
-{
- unsigned int elasticity = ht->elasticity;
- struct rhash_head *head;
-
- rht_for_each(head, tbl, hash)
- if (!--elasticity)
- return true;
-
- return false;
-}
-
-int rhashtable_insert_rehash(struct rhashtable *ht,
- struct bucket_table *tbl)
+static int rhashtable_insert_rehash(struct rhashtable *ht,
+ struct bucket_table *tbl)
{
struct bucket_table *old_tbl;
struct bucket_table *new_tbl;
@@ -439,57 +425,165 @@ fail:
return err;
}
-EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
-struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
- const void *key,
- struct rhash_head *obj,
- struct bucket_table *tbl,
- void **data)
+static void *rhashtable_lookup_one(struct rhashtable *ht,
+ struct bucket_table *tbl, unsigned int hash,
+ const void *key, struct rhash_head *obj)
{
+ struct rhashtable_compare_arg arg = {
+ .ht = ht,
+ .key = key,
+ };
+ struct rhash_head __rcu **pprev;
struct rhash_head *head;
- unsigned int hash;
- int err;
+ int elasticity;
- tbl = rhashtable_last_table(ht, tbl);
- hash = head_hashfn(ht, tbl, obj);
- spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
-
- err = -EEXIST;
- if (key) {
- *data = rhashtable_lookup_fast(ht, key, ht->p);
- if (*data)
- goto exit;
+ elasticity = ht->elasticity;
+ pprev = &tbl->buckets[hash];
+ rht_for_each(head, tbl, hash) {
+ struct rhlist_head *list;
+ struct rhlist_head *plist;
+
+ elasticity--;
+ if (!key ||
+ (ht->p.obj_cmpfn ?
+ ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
+ rhashtable_compare(&arg, rht_obj(ht, head))))
+ continue;
+
+ if (!ht->rhlist)
+ return rht_obj(ht, head);
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ plist = container_of(head, struct rhlist_head, rhead);
+
+ RCU_INIT_POINTER(list->next, plist);
+ head = rht_dereference_bucket(head->next, tbl, hash);
+ RCU_INIT_POINTER(list->rhead.next, head);
+ rcu_assign_pointer(*pprev, obj);
+
+ return NULL;
}
- err = -E2BIG;
- if (unlikely(rht_grow_above_max(ht, tbl)))
- goto exit;
+ if (elasticity <= 0)
+ return ERR_PTR(-EAGAIN);
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
+ struct bucket_table *tbl,
+ unsigned int hash,
+ struct rhash_head *obj,
+ void *data)
+{
+ struct bucket_table *new_tbl;
+ struct rhash_head *head;
+
+ if (!IS_ERR_OR_NULL(data))
+ return ERR_PTR(-EEXIST);
- err = -EAGAIN;
- if (rhashtable_check_elasticity(ht, tbl, hash) ||
- rht_grow_above_100(ht, tbl))
- goto exit;
+ if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
- err = 0;
+ new_tbl = rcu_dereference(tbl->future_tbl);
+ if (new_tbl)
+ return new_tbl;
+
+ if (PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
+
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+ return ERR_PTR(-E2BIG);
+
+ if (unlikely(rht_grow_above_100(ht, tbl)))
+ return ERR_PTR(-EAGAIN);
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
RCU_INIT_POINTER(obj->next, head);
+ if (ht->rhlist) {
+ struct rhlist_head *list;
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ RCU_INIT_POINTER(list->next, NULL);
+ }
rcu_assign_pointer(tbl->buckets[hash], obj);
atomic_inc(&ht->nelems);
+ if (rht_grow_above_75(ht, tbl))
+ schedule_work(&ht->run_work);
-exit:
- spin_unlock(rht_bucket_lock(tbl, hash));
+ return NULL;
+}
- if (err == 0)
- return NULL;
- else if (err == -EAGAIN)
- return tbl;
- else
- return ERR_PTR(err);
+static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ struct bucket_table *new_tbl;
+ struct bucket_table *tbl;
+ unsigned int hash;
+ spinlock_t *lock;
+ void *data;
+
+ tbl = rcu_dereference(ht->tbl);
+
+ /* All insertions must grab the oldest table containing
+ * the hashed bucket that is yet to be rehashed.
+ */
+ for (;;) {
+ hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+ lock = rht_bucket_lock(tbl, hash);
+ spin_lock_bh(lock);
+
+ if (tbl->rehash <= hash)
+ break;
+
+ spin_unlock_bh(lock);
+ tbl = rcu_dereference(tbl->future_tbl);
+ }
+
+ data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ while (!IS_ERR_OR_NULL(new_tbl)) {
+ tbl = new_tbl;
+ hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+ spin_lock_nested(rht_bucket_lock(tbl, hash),
+ SINGLE_DEPTH_NESTING);
+
+ data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ spin_unlock(rht_bucket_lock(tbl, hash));
+ }
+
+ spin_unlock_bh(lock);
+
+ if (PTR_ERR(data) == -EAGAIN)
+ data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
+ -EAGAIN);
+
+ return data;
+}
+
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ void *data;
+
+ do {
+ rcu_read_lock();
+ data = rhashtable_try_insert(ht, key, obj);
+ rcu_read_unlock();
+ } while (PTR_ERR(data) == -EAGAIN);
+
+ return data;
}
EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
@@ -593,11 +687,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
void *rhashtable_walk_next(struct rhashtable_iter *iter)
{
struct bucket_table *tbl = iter->walker.tbl;
+ struct rhlist_head *list = iter->list;
struct rhashtable *ht = iter->ht;
struct rhash_head *p = iter->p;
+ bool rhlist = ht->rhlist;
if (p) {
- p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
+ if (!rhlist || !(list = rcu_dereference(list->next))) {
+ p = rcu_dereference(p->next);
+ list = container_of(p, struct rhlist_head, rhead);
+ }
goto next;
}
@@ -605,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
int skip = iter->skip;
rht_for_each_rcu(p, tbl, iter->slot) {
+ if (rhlist) {
+ list = container_of(p, struct rhlist_head,
+ rhead);
+ do {
+ if (!skip)
+ goto next;
+ skip--;
+ list = rcu_dereference(list->next);
+ } while (list);
+
+ continue;
+ }
if (!skip)
break;
skip--;
@@ -614,7 +725,8 @@ next:
if (!rht_is_a_nulls(p)) {
iter->skip++;
iter->p = p;
- return rht_obj(ht, p);
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
}
iter->skip = 0;
@@ -803,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht,
EXPORT_SYMBOL_GPL(rhashtable_init);
/**
+ * rhltable_init - initialize a new hash list table
+ * @hlt: hash list table to be initialized
+ * @params: configuration parameters
+ *
+ * Initializes a new hash list table.
+ *
+ * See documentation for rhashtable_init.
+ */
+int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+{
+ int err;
+
+ /* No rhlist NULLs marking for now. */
+ if (params->nulls_base)
+ return -EINVAL;
+
+ err = rhashtable_init(&hlt->ht, params);
+ hlt->ht.rhlist = true;
+ return err;
+}
+EXPORT_SYMBOL_GPL(rhltable_init);
+
+static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg)
+{
+ struct rhlist_head *list;
+
+ if (!ht->rhlist) {
+ free_fn(rht_obj(ht, obj), arg);
+ return;
+ }
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ do {
+ obj = &list->rhead;
+ list = rht_dereference(list->next, ht);
+ free_fn(rht_obj(ht, obj), arg);
+ } while (list);
+}
+
+/**
* rhashtable_free_and_destroy - free elements and destroy hash table
* @ht: the hash table to destroy
* @free_fn: callback to release resources of element
@@ -839,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
pos = next,
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL)
- free_fn(rht_obj(ht, pos), arg);
+ rhashtable_free_one(ht, pos, free_fn, arg);
}
}
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
new file mode 100644
index 000000000000..c8420d404926
--- /dev/null
+++ b/lib/win_minmax.c
@@ -0,0 +1,98 @@
+/**
+ * lib/minmax.c: windowed min/max tracker
+ *
+ * Kathleen Nichols' algorithm for tracking the minimum (or maximum)
+ * value of a data stream over some fixed time interval. (E.g.,
+ * the minimum RTT over the past five minutes.) It uses constant
+ * space and constant time per update yet almost always delivers
+ * the same minimum as an implementation that has to keep all the
+ * data in the window.
+ *
+ * The algorithm keeps track of the best, 2nd best & 3rd best min
+ * values, maintaining an invariant that the measurement time of
+ * the n'th best >= n-1'th best. It also makes sure that the three
+ * values are widely separated in the time window since that bounds
+ * the worse case error when that data is monotonically increasing
+ * over the window.
+ *
+ * Upon getting a new min, we can forget everything earlier because
+ * it has no value - the new min is <= everything else in the window
+ * by definition and it's the most recent. So we restart fresh on
+ * every new min and overwrites 2nd & 3rd choices. The same property
+ * holds for 2nd & 3rd best.
+ */
+#include <linux/module.h>
+#include <linux/win_minmax.h>
+
+/* As time advances, update the 1st, 2nd, and 3rd choices. */
+static u32 minmax_subwin_update(struct minmax *m, u32 win,
+ const struct minmax_sample *val)
+{
+ u32 dt = val->t - m->s[0].t;
+
+ if (unlikely(dt > win)) {
+ /*
+ * Passed entire window without a new val so make 2nd
+ * choice the new val & 3rd choice the new 2nd choice.
+ * we may have to iterate this since our 2nd choice
+ * may also be outside the window (we checked on entry
+ * that the third choice was in the window).
+ */
+ m->s[0] = m->s[1];
+ m->s[1] = m->s[2];
+ m->s[2] = *val;
+ if (unlikely(val->t - m->s[0].t > win)) {
+ m->s[0] = m->s[1];
+ m->s[1] = m->s[2];
+ m->s[2] = *val;
+ }
+ } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) {
+ /*
+ * We've passed a quarter of the window without a new val
+ * so take a 2nd choice from the 2nd quarter of the window.
+ */
+ m->s[2] = m->s[1] = *val;
+ } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) {
+ /*
+ * We've passed half the window without finding a new val
+ * so take a 3rd choice from the last half of the window
+ */
+ m->s[2] = *val;
+ }
+ return m->s[0].v;
+}
+
+/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */
+u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+ struct minmax_sample val = { .t = t, .v = meas };
+
+ if (unlikely(val.v >= m->s[0].v) || /* found new max? */
+ unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
+ return minmax_reset(m, t, meas); /* forget earlier samples */
+
+ if (unlikely(val.v >= m->s[1].v))
+ m->s[2] = m->s[1] = val;
+ else if (unlikely(val.v >= m->s[2].v))
+ m->s[2] = val;
+
+ return minmax_subwin_update(m, win, &val);
+}
+EXPORT_SYMBOL(minmax_running_max);
+
+/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */
+u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+ struct minmax_sample val = { .t = t, .v = meas };
+
+ if (unlikely(val.v <= m->s[0].v) || /* found new min? */
+ unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
+ return minmax_reset(m, t, meas); /* forget earlier samples */
+
+ if (unlikely(val.v <= m->s[1].v))
+ m->s[2] = m->s[1] = val;
+ else if (unlikely(val.v <= m->s[2].v))
+ m->s[2] = val;
+
+ return minmax_subwin_update(m, win, &val);
+}