From 737f98cfe7de8df7433a4d846850aa8efa44bd48 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:13:59 +0800 Subject: blk-mq: initialize mq kobjects in blk_mq_init_allocated_queue() Both q->mq_kobj and sw queues' kobjects should have been initialized once, instead of doing that each add_disk context. Also this patch removes clearing of ctx in blk_mq_init_cpu_queues() because percpu allocator fills zero to allocated variable. This patch fixes one issue[1] reported from Omar. [1] kernel wearning when doing unbind/bind on one scsi-mq device [ 19.347924] kobject (ffff8800791ea0b8): tried to init an initialized object, something is seriously wrong. [ 19.349781] CPU: 1 PID: 84 Comm: kworker/u8:1 Not tainted 4.10.0-rc7-00210-g53f39eeaa263 #34 [ 19.350686] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-20161122_114906-anatol 04/01/2014 [ 19.350920] Workqueue: events_unbound async_run_entry_fn [ 19.350920] Call Trace: [ 19.350920] dump_stack+0x63/0x83 [ 19.350920] kobject_init+0x77/0x90 [ 19.350920] blk_mq_register_dev+0x40/0x130 [ 19.350920] blk_register_queue+0xb6/0x190 [ 19.350920] device_add_disk+0x1ec/0x4b0 [ 19.350920] sd_probe_async+0x10d/0x1c0 [sd_mod] [ 19.350920] async_run_entry_fn+0x48/0x150 [ 19.350920] process_one_work+0x1d0/0x480 [ 19.350920] worker_thread+0x48/0x4e0 [ 19.350920] kthread+0x101/0x140 [ 19.350920] ? process_one_work+0x480/0x480 [ 19.350920] ? kthread_create_on_node+0x60/0x60 [ 19.350920] ret_from_fork+0x2c/0x40 Cc: Omar Sandoval Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index b2fd175e84d7..ed4b55176cdd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2045,7 +2045,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -2352,6 +2351,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) -- cgit v1.2.3 From 7ea5fe31c12dd8bcf4a9c5a4a7e8e23826a9a3b8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:14:00 +0800 Subject: blk-mq: make lifetime consitent between q/ctx and its kobject Currently from kobject view, both q->mq_kobj and ctx->kobj can be released during one cycle of blk_mq_register_dev() and blk_mq_unregister_dev(). Actually, sw queue's lifetime is same with its request queue's, which is covered by request_queue->kobj. So we don't need to call kobject_put() for the two kinds of kobject in __blk_mq_unregister_dev(), instead we do that in release handler of request queue. Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 20 +++++++++++++------- block/blk-mq.c | 7 ++++++- block/blk-mq.h | 1 + 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 124305407c80..77fb238af2be 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -242,15 +242,11 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - int i, j; + int i; queue_for_each_hw_ctx(q, hctx, i) { blk_mq_unregister_hctx(hctx); - hctx_for_each_ctx(hctx, ctx, j) - kobject_put(&ctx->kobj); - kobject_put(&hctx->kobj); } @@ -258,8 +254,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); - kobject_put(&q->mq_kobj); - kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; @@ -277,6 +271,18 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } +void blk_mq_sysfs_deinit(struct request_queue *q) +{ + struct blk_mq_ctx *ctx; + int cpu; + + for_each_possible_cpu(cpu) { + ctx = per_cpu_ptr(q->queue_ctx, cpu); + kobject_put(&ctx->kobj); + } + kobject_put(&q->mq_kobj); +} + void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; diff --git a/block/blk-mq.c b/block/blk-mq.c index ed4b55176cdd..b985c236f50f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2264,7 +2264,12 @@ void blk_mq_release(struct request_queue *q) kfree(q->queue_hw_ctx); - /* ctx kobj stays in queue_ctx */ + /* + * release .mq_kobj and sw queue's kobject now because + * both share lifetime with request queue. + */ + blk_mq_sysfs_deinit(q); + free_percpu(q->queue_ctx); } diff --git a/block/blk-mq.h b/block/blk-mq.h index ad8bfd7473ef..b79f9a7d8cf6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -78,6 +78,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, * sysfs helpers */ extern void blk_mq_sysfs_init(struct request_queue *q); +extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From 6c8b232efea1ad3d263ff8b9c16b7e8767a77488 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:14:01 +0800 Subject: blk-mq: make lifetime consistent between hctx and its kobject This patch removes kobject_put() over hctx in __blk_mq_unregister_dev(), and trys to keep lifetime consistent between hctx and hctx's kobject. Now blk_mq_sysfs_register() and blk_mq_sysfs_unregister() become totally symmetrical, and kobject's refcounter drops to zero just when the hctx is freed. Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 15 ++++++++++----- block/blk-mq.c | 5 +---- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 77fb238af2be..cb19ec16a7fc 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -17,6 +17,14 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { } +static void blk_mq_hw_sysfs_release(struct kobject *kobj) +{ + struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, + kobj); + kfree(hctx->ctxs); + kfree(hctx); +} + struct blk_mq_ctx_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_mq_ctx *, char *); @@ -200,7 +208,7 @@ static struct kobj_type blk_mq_ctx_ktype = { static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_attrs = default_hw_ctx_attrs, - .release = blk_mq_sysfs_release, + .release = blk_mq_hw_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -244,12 +252,9 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - kobject_put(&hctx->kobj); - } - blk_mq_debugfs_unregister_hctxs(q); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); diff --git a/block/blk-mq.c b/block/blk-mq.c index b985c236f50f..f70595e5fb86 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2256,8 +2256,7 @@ void blk_mq_release(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) continue; - kfree(hctx->ctxs); - kfree(hctx); + kobject_put(&hctx->kobj); } q->mq_map = NULL; @@ -2336,8 +2335,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, blk_mq_exit_hctx(q, set, hctx, j); free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); - kfree(hctx->ctxs); - kfree(hctx); hctxs[j] = NULL; } -- cgit v1.2.3 From 01388df37627d2e89f0b835377c0eb39d81f671c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Feb 2017 18:14:02 +0800 Subject: blk-mq: free hctx->cpumask in release handler of hctx's kobject It is obviously that hctx->cpumask is per hctx, and both share same lifetime, so this patch moves freeing of hctx->cpumask into release handler of hctx's kobject. Signed-off-by: Ming Lei Tested-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 1 + block/blk-mq.c | 12 ------------ 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index cb19ec16a7fc..d745ab81033a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -21,6 +21,7 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj) { struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); + free_cpumask_var(hctx->cpumask); kfree(hctx->ctxs); kfree(hctx); } diff --git a/block/blk-mq.c b/block/blk-mq.c index f70595e5fb86..159187a28d66 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1955,16 +1955,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } -static void blk_mq_free_hw_queues(struct request_queue *q, - struct blk_mq_tag_set *set) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - - queue_for_each_hw_ctx(q, hctx, i) - free_cpumask_var(hctx->cpumask); -} - static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) @@ -2333,7 +2323,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, if (hctx->tags) blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); - free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); hctxs[j] = NULL; @@ -2446,7 +2435,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); - blk_mq_free_hw_queues(q, set); } /* Basically redo blk_mq_init_queue with queue frozen */ -- cgit v1.2.3 From 9c62110454b088b4914ffe375c2dbc19643eac34 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Mar 2017 11:51:59 -0600 Subject: blk-mq-sched: don't run the queue async from blk_mq_try_issue_directly() If we have scheduling enabled, we jump directly to insert-and-run. That's fine, but we run the queue async and we don't pass in information on whether we can block from this context or not. Fixup both these cases. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 159187a28d66..a4546f060e80 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1434,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } -static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) +static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, + bool may_sleep) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1475,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) } insert: - blk_mq_sched_insert_request(rq, false, true, true, false); + blk_mq_sched_insert_request(rq, false, true, false, may_sleep); } /* @@ -1569,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, false); rcu_read_unlock(); } else { srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, true); srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx); } goto done; -- cgit v1.2.3