diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-20 05:56:43 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-20 05:56:43 +0200 |
commit | f1d702487b3bc16466ad9b4e5c76277b6829d34c (patch) | |
tree | 30398d00626a15477645cef81507808330f9439a | |
parent | Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff) | |
parent | blk-mq: bitmap tag: fix races in bt_get() function (diff) | |
download | linux-f1d702487b3bc16466ad9b4e5c76277b6829d34c.tar.xz linux-f1d702487b3bc16466ad9b4e5c76277b6829d34c.zip |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A smaller collection of fixes for the block core that would be nice to
have in -rc2. This pull request contains:
- Fixes for races in the wait/wakeup logic used in blk-mq from
Alexander. No issues have been observed, but it is definitely a
bit flakey currently. Alternatively, we may drop the cyclic
wakeups going forward, but that needs more testing.
- Some cleanups from Christoph.
- Fix for an oops in null_blk if queue_mode=1 and softirq completions
are used. From me.
- A fix for a regression caused by the chunk size setting. It
inadvertently used max_hw_sectors instead of max_sectors, which is
incorrect, and causes hangs on btrfs multi-disk setups (where hw
sectors apparently isn't set). From me.
- Removal of WQ_POWER_EFFICIENT in the kblockd creation. This was a
recent addition as well, but it actually breaks blk-mq which relies
on strict scheduling. If the workqueue power_efficient mode is
turned on, this breaks blk-mq. From Matias.
- null_blk module parameter description fix from Mike"
* 'for-linus' of git://git.kernel.dk/linux-block:
blk-mq: bitmap tag: fix races in bt_get() function
blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt
blk-mq: bitmap tag: fix races on shared ::wake_index fields
block: blk_max_size_offset() should check ->max_sectors
null_blk: fix softirq completions for queue_mode == 1
blk-mq: merge blk_mq_drain_queue and __blk_mq_drain_queue
blk-mq: properly drain stopped queues
block: remove WQ_POWER_EFFICIENT from kblockd
null_blk: fix name and description of 'queue_mode' module parameter
block: remove elv_abort_queue and blk_abort_flushes
-rw-r--r-- | block/blk-core.c | 3 | ||||
-rw-r--r-- | block/blk-flush.c | 38 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 59 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 11 | ||||
-rw-r--r-- | block/blk.h | 1 | ||||
-rw-r--r-- | block/elevator.c | 20 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 7 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 2 | ||||
-rw-r--r-- | include/linux/elevator.h | 1 |
11 files changed, 50 insertions, 96 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index f6f6b9af3e3f..6f8dba161bfe 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3312,8 +3312,7 @@ int __init blk_dev_init(void) /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | - WQ_POWER_EFFICIENT, 0); + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); diff --git a/block/blk-flush.c b/block/blk-flush.c index 8ffee4b5f93d..3cb5e9e7108a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -422,44 +422,6 @@ void blk_insert_flush(struct request *rq) } /** - * blk_abort_flushes - @q is being aborted, abort flush requests - * @q: request_queue being aborted - * - * To be called from elv_abort_queue(). @q is being aborted. Prepare all - * FLUSH/FUA requests for abortion. - * - * CONTEXT: - * spin_lock_irq(q->queue_lock) - */ -void blk_abort_flushes(struct request_queue *q) -{ - struct request *rq, *n; - int i; - - /* - * Requests in flight for data are already owned by the dispatch - * queue or the device driver. Just restore for normal completion. - */ - list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) { - list_del_init(&rq->flush.list); - blk_flush_restore_request(rq); - } - - /* - * We need to give away requests on flush queues. Restore for - * normal completion and put them on the dispatch queue. - */ - for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) { - list_for_each_entry_safe(rq, n, &q->flush_queue[i], - flush.list) { - list_del_init(&rq->flush.list); - blk_flush_restore_request(rq); - list_add_tail(&rq->queuelist, &q->queue_head); - } - } -} - -/** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for * @gfp_mask: memory allocation flags (for bio_alloc) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 1aab39f71d95..c1b92426c95e 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags) return bt_has_free_tags(&tags->bitmap_tags); } -static inline void bt_index_inc(unsigned int *index) +static inline int bt_index_inc(int index) { - *index = (*index + 1) & (BT_WAIT_QUEUES - 1); + return (index + 1) & (BT_WAIT_QUEUES - 1); +} + +static inline void bt_index_atomic_inc(atomic_t *index) +{ + int old = atomic_read(index); + int new = bt_index_inc(old); + atomic_cmpxchg(index, old, new); } /* @@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) int i, wake_index; bt = &tags->bitmap_tags; - wake_index = bt->wake_index; + wake_index = atomic_read(&bt->wake_index); for (i = 0; i < BT_WAIT_QUEUES; i++) { struct bt_wait_state *bs = &bt->bs[wake_index]; if (waitqueue_active(&bs->wait)) wake_up(&bs->wait); - bt_index_inc(&wake_index); + wake_index = bt_index_inc(wake_index); } } @@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx) { struct bt_wait_state *bs; + int wait_index; if (!hctx) return &bt->bs[0]; - bs = &bt->bs[hctx->wait_index]; - bt_index_inc(&hctx->wait_index); + wait_index = atomic_read(&hctx->wait_index); + bs = &bt->bs[wait_index]; + bt_index_atomic_inc(&hctx->wait_index); return bs; } @@ -239,18 +248,12 @@ static int bt_get(struct blk_mq_alloc_data *data, bs = bt_wait_ptr(bt, hctx); do { - bool was_empty; - - was_empty = list_empty(&wait.task_list); prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); tag = __bt_get(hctx, bt, last_tag); if (tag != -1) break; - if (was_empty) - atomic_set(&bs->wait_cnt, bt->wake_cnt); - blk_mq_put_ctx(data->ctx); io_schedule(); @@ -313,18 +316,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) { int i, wake_index; - wake_index = bt->wake_index; + wake_index = atomic_read(&bt->wake_index); for (i = 0; i < BT_WAIT_QUEUES; i++) { struct bt_wait_state *bs = &bt->bs[wake_index]; if (waitqueue_active(&bs->wait)) { - if (wake_index != bt->wake_index) - bt->wake_index = wake_index; + int o = atomic_read(&bt->wake_index); + if (wake_index != o) + atomic_cmpxchg(&bt->wake_index, o, wake_index); return bs; } - bt_index_inc(&wake_index); + wake_index = bt_index_inc(wake_index); } return NULL; @@ -334,6 +338,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) { const int index = TAG_TO_INDEX(bt, tag); struct bt_wait_state *bs; + int wait_cnt; /* * The unlock memory barrier need to order access to req in free @@ -342,10 +347,19 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); bs = bt_wake_ptr(bt); - if (bs && atomic_dec_and_test(&bs->wait_cnt)) { - atomic_set(&bs->wait_cnt, bt->wake_cnt); - bt_index_inc(&bt->wake_index); + if (!bs) + return; + + wait_cnt = atomic_dec_return(&bs->wait_cnt); + if (wait_cnt == 0) { +wake: + atomic_add(bt->wake_cnt, &bs->wait_cnt); + bt_index_atomic_inc(&bt->wake_index); wake_up(&bs->wait); + } else if (wait_cnt < 0) { + wait_cnt = atomic_inc_return(&bs->wait_cnt); + if (!wait_cnt) + goto wake; } } @@ -499,10 +513,13 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, return -ENOMEM; } - for (i = 0; i < BT_WAIT_QUEUES; i++) + bt_update_count(bt, depth); + + for (i = 0; i < BT_WAIT_QUEUES; i++) { init_waitqueue_head(&bt->bs[i].wait); + atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt); + } - bt_update_count(bt, depth); return 0; } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 98696a65d4d4..6206ed17ef76 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags { unsigned int map_nr; struct blk_align_bitmap *map; - unsigned int wake_index; + atomic_t wake_index; struct bt_wait_state *bs; }; diff --git a/block/blk-mq.c b/block/blk-mq.c index e11f5f8e0313..0ef2dc7f01bf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -109,7 +109,7 @@ static void blk_mq_queue_exit(struct request_queue *q) __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); } -static void __blk_mq_drain_queue(struct request_queue *q) +void blk_mq_drain_queue(struct request_queue *q) { while (true) { s64 count; @@ -120,7 +120,7 @@ static void __blk_mq_drain_queue(struct request_queue *q) if (count == 0) break; - blk_mq_run_queues(q, false); + blk_mq_start_hw_queues(q); msleep(10); } } @@ -139,12 +139,7 @@ static void blk_mq_freeze_queue(struct request_queue *q) spin_unlock_irq(q->queue_lock); if (drain) - __blk_mq_drain_queue(q); -} - -void blk_mq_drain_queue(struct request_queue *q) -{ - __blk_mq_drain_queue(q); + blk_mq_drain_queue(q); } static void blk_mq_unfreeze_queue(struct request_queue *q) diff --git a/block/blk.h b/block/blk.h index 45385e9abf6f..6748c4f8d7a1 100644 --- a/block/blk.h +++ b/block/blk.h @@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq) #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) void blk_insert_flush(struct request *rq); -void blk_abort_flushes(struct request_queue *q); static inline struct request *__elv_next_request(struct request_queue *q) { diff --git a/block/elevator.c b/block/elevator.c index f35edddfe9b5..34bded18910e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw) return ELV_MQUEUE_MAY; } -void elv_abort_queue(struct request_queue *q) -{ - struct request *rq; - - blk_abort_flushes(q); - - while (!list_empty(&q->queue_head)) { - rq = list_entry_rq(q->queue_head.next); - rq->cmd_flags |= REQ_QUIET; - trace_block_rq_abort(q, rq); - /* - * Mark this request as started so we don't trigger - * any debug logic in the end I/O path. - */ - blk_start_request(rq); - __blk_end_request_all(rq, -EIO); - } -} -EXPORT_SYMBOL(elv_abort_queue); - void elv_completed_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 77087a29b127..a3b042c4d448 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -79,7 +79,7 @@ MODULE_PARM_DESC(home_node, "Home node for the device"); static int queue_mode = NULL_Q_MQ; module_param(queue_mode, int, S_IRUGO); -MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); +MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)"); static int gb = 250; module_param(gb, int, S_IRUGO); @@ -227,7 +227,10 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) static void null_softirq_done_fn(struct request *rq) { - end_cmd(blk_mq_rq_to_pdu(rq)); + if (queue_mode == NULL_Q_MQ) + end_cmd(blk_mq_rq_to_pdu(rq)); + else + end_cmd(rq->special); } static inline void null_handle_cmd(struct nullb_cmd *cmd) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a002cf191427..eb726b9c5762 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -42,7 +42,7 @@ struct blk_mq_hw_ctx { unsigned int nr_ctx; struct blk_mq_ctx **ctxs; - unsigned int wait_index; + atomic_t wait_index; struct blk_mq_tags *tags; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 31e11051f1ba..713f8b62b435 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -920,7 +920,7 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, sector_t offset) { if (!q->limits.chunk_sectors) - return q->limits.max_hw_sectors; + return q->limits.max_sectors; return q->limits.chunk_sectors - (offset & (q->limits.chunk_sectors - 1)); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 4ff262e2bf37..e2a6bd7fb133 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -133,7 +133,6 @@ extern struct request *elv_latter_request(struct request_queue *, struct request extern int elv_register_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, int); -extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *q, struct request *rq, struct bio *bio, gfp_t gfp_mask); |