summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-04 16:58:06 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-04 16:58:06 +0200
commitf459c34538f57661e0fd1d3eaf7c0b17125ae011 (patch)
tree3addc82d7f792c4533501978798dad0095293933 /lib
parentLinux 4.17 (diff)
parentblk-mq: update nr_requests when switching to 'none' scheduler (diff)
downloadlinux-f459c34538f57661e0fd1d3eaf7c0b17125ae011.tar.xz
linux-f459c34538f57661e0fd1d3eaf7c0b17125ae011.zip
Merge tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: - clean up how we pass around gfp_t and blk_mq_req_flags_t (Christoph) - prepare us to defer scheduler attach (Christoph) - clean up drivers handling of bounce buffers (Christoph) - fix timeout handling corner cases (Christoph/Bart/Keith) - bcache fixes (Coly) - prep work for bcachefs and some block layer optimizations (Kent). - convert users of bio_sets to using embedded structs (Kent). - fixes for the BFQ io scheduler (Paolo/Davide/Filippo) - lightnvm fixes and improvements (Matias, with contributions from Hans and Javier) - adding discard throttling to blk-wbt (me) - sbitmap blk-mq-tag handling (me/Omar/Ming). - remove the sparc jsflash block driver, acked by DaveM. - Kyber scheduler improvement from Jianchao, making it more friendly wrt merging. - conversion of symbolic proc permissions to octal, from Joe Perches. Previously the block parts were a mix of both. - nbd fixes (Josef and Kevin Vigor) - unify how we handle the various kinds of timestamps that the block core and utility code uses (Omar) - three NVMe pull requests from Keith and Christoph, bringing AEN to feature completeness, file backed namespaces, cq/sq lock split, and various fixes - various little fixes and improvements all over the map * tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block: (196 commits) blk-mq: update nr_requests when switching to 'none' scheduler block: don't use blocking queue entered for recursive bio submits dm-crypt: fix warning in shutdown path lightnvm: pblk: take bitmap alloc. out of critical section lightnvm: pblk: kick writer on new flush points lightnvm: pblk: only try to recover lines with written smeta lightnvm: pblk: remove unnecessary bio_get/put lightnvm: pblk: add possibility to set write buffer size manually lightnvm: fix partial read error path lightnvm: proper error handling for pblk_bio_add_pages lightnvm: pblk: fix smeta write error path lightnvm: pblk: garbage collect lines with failed writes lightnvm: pblk: rework write error recovery path lightnvm: pblk: remove dead function lightnvm: pass flag on graceful teardown to targets lightnvm: pblk: check for chunk size before allocating it lightnvm: pblk: remove unnecessary argument lightnvm: pblk: remove unnecessary indirection lightnvm: pblk: return NVM_ error on failed submission lightnvm: pblk: warn in case of corrupted write buffer ...
Diffstat (limited to 'lib')
-rw-r--r--lib/sbitmap.c113
1 files changed, 81 insertions, 32 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index e6a9c06ec70c..6fdc6267f4a8 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
}
EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
-static unsigned int sbq_calc_wake_batch(unsigned int depth)
+static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
{
unsigned int wake_batch;
+ unsigned int shallow_depth;
/*
* For each batch, we wake up one queue. We need to make sure that our
- * batch size is small enough that the full depth of the bitmap is
- * enough to wake up all of the queues.
+ * batch size is small enough that the full depth of the bitmap,
+ * potentially limited by a shallow depth, is enough to wake up all of
+ * the queues.
+ *
+ * Each full word of the bitmap has bits_per_word bits, and there might
+ * be a partial word. There are depth / bits_per_word full words and
+ * depth % bits_per_word bits left over. In bitwise arithmetic:
+ *
+ * bits_per_word = 1 << shift
+ * depth / bits_per_word = depth >> shift
+ * depth % bits_per_word = depth & ((1 << shift) - 1)
+ *
+ * Each word can be limited to sbq->min_shallow_depth bits.
*/
- wake_batch = SBQ_WAKE_BATCH;
- if (wake_batch > depth / SBQ_WAIT_QUEUES)
- wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
+ shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
+ depth = ((depth >> sbq->sb.shift) * shallow_depth +
+ min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
+ wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
+ SBQ_WAKE_BATCH);
return wake_batch;
}
@@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
}
- sbq->wake_batch = sbq_calc_wake_batch(depth);
+ sbq->min_shallow_depth = UINT_MAX;
+ sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
atomic_set(&sbq->wake_index, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
@@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
{
- unsigned int wake_batch = sbq_calc_wake_batch(depth);
+ unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
WRITE_ONCE(sbq->wake_batch, wake_batch);
/*
- * Pairs with the memory barrier in sbq_wake_up() to ensure that
- * the batch size is updated before the wait counts.
+ * Pairs with the memory barrier in sbitmap_queue_wake_up()
+ * to ensure that the batch size is updated before the wait
+ * counts.
*/
smp_mb__before_atomic();
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
atomic_set(&sbq->ws[i].wait_cnt, 1);
}
+}
+
+void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+{
+ sbitmap_queue_update_wake_batch(sbq, depth);
sbitmap_resize(&sbq->sb, depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
unsigned int hint, depth;
int nr;
+ WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
+
hint = this_cpu_read(*sbq->alloc_hint);
depth = READ_ONCE(sbq->sb.depth);
if (unlikely(hint >= depth)) {
@@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
}
EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
+void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
+ unsigned int min_shallow_depth)
+{
+ sbq->min_shallow_depth = min_shallow_depth;
+ sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
+
static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
{
int i, wake_index;
@@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
return NULL;
}
-static void sbq_wake_up(struct sbitmap_queue *sbq)
+static bool __sbq_wake_up(struct sbitmap_queue *sbq)
{
struct sbq_wait_state *ws;
unsigned int wake_batch;
int wait_cnt;
- /*
- * Pairs with the memory barrier in set_current_state() to ensure the
- * proper ordering of clear_bit()/waitqueue_active() in the waker and
- * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
- * waiter. See the comment on waitqueue_active(). This is __after_atomic
- * because we just did clear_bit_unlock() in the caller.
- */
- smp_mb__after_atomic();
-
ws = sbq_wake_ptr(sbq);
if (!ws)
- return;
+ return false;
wait_cnt = atomic_dec_return(&ws->wait_cnt);
if (wait_cnt <= 0) {
+ int ret;
+
wake_batch = READ_ONCE(sbq->wake_batch);
+
/*
* Pairs with the memory barrier in sbitmap_queue_resize() to
* ensure that we see the batch size update before the wait
* count is reset.
*/
smp_mb__before_atomic();
+
/*
- * If there are concurrent callers to sbq_wake_up(), the last
- * one to decrement the wait count below zero will bump it back
- * up. If there is a concurrent resize, the count reset will
- * either cause the cmpxchg to fail or overwrite after the
- * cmpxchg.
+ * For concurrent callers of this, the one that failed the
+ * atomic_cmpxhcg() race should call this function again
+ * to wakeup a new batch on a different 'ws'.
*/
- atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
- sbq_index_atomic_inc(&sbq->wake_index);
- wake_up_nr(&ws->wait, wake_batch);
+ ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
+ if (ret == wait_cnt) {
+ sbq_index_atomic_inc(&sbq->wake_index);
+ wake_up_nr(&ws->wait, wake_batch);
+ return false;
+ }
+
+ return true;
}
+
+ return false;
+}
+
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
+{
+ while (__sbq_wake_up(sbq))
+ ;
}
+EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu)
{
sbitmap_clear_bit_unlock(&sbq->sb, nr);
- sbq_wake_up(sbq);
+ /*
+ * Pairs with the memory barrier in set_current_state() to ensure the
+ * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
+ * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+ * waiter. See the comment on waitqueue_active().
+ */
+ smp_mb__after_atomic();
+ sbitmap_queue_wake_up(sbq);
+
if (likely(!sbq->round_robin && nr < sbq->sb.depth))
*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
}
@@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
/*
* Pairs with the memory barrier in set_current_state() like in
- * sbq_wake_up().
+ * sbitmap_queue_wake_up().
*/
smp_mb();
wake_index = atomic_read(&sbq->wake_index);
@@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_puts(m, "}\n");
seq_printf(m, "round_robin=%d\n", sbq->round_robin);
+ seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_show);