From 8b4922d3173d2eee7b43be8e5caec3ab7d30feff Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Feb 2014 16:39:52 +0100 Subject: block: Stop abusing csd.list for fifo_time Block layer currently abuses rq->csd.list.next for storing fifo_time. That is a terrible hack and completely unnecessary as well. Union achieves the same space saving in a cleaner way. Signed-off-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/elevator.h | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8f6090..1e1fa3f93d5f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -99,6 +99,7 @@ struct request { union { struct call_single_data csd; struct work_struct mq_flush_work; + unsigned long fifo_time; }; struct request_queue *q; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 306dd8cd0b6f..0bdfd46f4735 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -202,12 +202,6 @@ enum { #define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) -/* - * Hack to reuse the csd.list list_head as the fifo time holder while - * the request is in the io scheduler. Saves an unsigned long in rq. - */ -#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next) -#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp)) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) do { \ list_del_init(&(rq)->queuelist); \ -- cgit v1.2.3 From d9a74df512e44580a34bf6e70f5d08c126507354 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:39:53 +0100 Subject: block: Remove useless IPI struct initialization rq_fifo_clear() reset the csd.list through INIT_LIST_HEAD for no clear purpose. The csd.list doesn't need to be initialized as a list head because it's only ever used as a list node. Lets remove this useless initialization. Reviewed-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/elevator.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 0bdfd46f4735..df63bd3a8cf1 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -203,10 +203,7 @@ enum { #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) -#define rq_fifo_clear(rq) do { \ - list_del_init(&(rq)->queuelist); \ - INIT_LIST_HEAD(&(rq)->csd.list); \ - } while (0) +#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From 0ebeb79ce920bed3a4a55113534951d95a444fbb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Feb 2014 16:39:56 +0100 Subject: smp: Remove unused list_head from csd Now that we got rid of all the remaining code which fiddled with csd.list, lets remove it. Signed-off-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/smp.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 6ae004e437ea..4991c6b12831 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -17,10 +17,7 @@ extern void cpu_idle(void); typedef void (*smp_call_func_t)(void *info); struct call_single_data { - union { - struct list_head list; - struct llist_node llist; - }; + struct llist_node llist; smp_call_func_t func; void *info; u16 flags; -- cgit v1.2.3 From 08eed44c7249d381a099bc55577e55c6bb533160 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Feb 2014 16:39:57 +0100 Subject: smp: Teach __smp_call_function_single() to check for offline cpus Align __smp_call_function_single() with smp_call_function_single() so that it also checks whether requested cpu is still online. Signed-off-by: Jan Kara Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- include/linux/smp.h | 3 +-- kernel/smp.c | 11 +++++++---- kernel/up.c | 5 +++-- 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 4991c6b12831..c39074c794c5 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,8 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -void __smp_call_function_single(int cpuid, struct call_single_data *data, - int wait); +int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait); #ifdef CONFIG_SMP diff --git a/kernel/smp.c b/kernel/smp.c index e3852de042a6..5ff14e3739ca 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -276,18 +276,18 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); /** * __smp_call_function_single(): Run a function on a specific CPU * @cpu: The CPU to run on. - * @data: Pre-allocated and setup data structure + * @csd: Pre-allocated and setup data structure * @wait: If true, wait until function has completed on specified CPU. * * Like smp_call_function_single(), but allow caller to pass in a * pre-allocated data structure. Useful for embedding @data inside * other structures, for instance. */ -void __smp_call_function_single(int cpu, struct call_single_data *csd, - int wait) +int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait) { unsigned int this_cpu; unsigned long flags; + int err = 0; this_cpu = get_cpu(); /* @@ -303,11 +303,14 @@ void __smp_call_function_single(int cpu, struct call_single_data *csd, local_irq_save(flags); csd->func(csd->info); local_irq_restore(flags); - } else { + } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { csd_lock(csd); generic_exec_single(cpu, csd, wait); + } else { + err = -ENXIO; /* CPU not online */ } put_cpu(); + return err; } EXPORT_SYMBOL_GPL(__smp_call_function_single); diff --git a/kernel/up.c b/kernel/up.c index 509403e3fbc6..cdf03d16840e 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -22,14 +22,15 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -void __smp_call_function_single(int cpu, struct call_single_data *csd, - int wait) +int __smp_call_function_single(int cpu, struct call_single_data *csd, + int wait) { unsigned long flags; local_irq_save(flags); csd->func(csd->info); local_irq_restore(flags); + return 0; } EXPORT_SYMBOL(__smp_call_function_single); -- cgit v1.2.3 From fce8ad1568c57e7f334018dec4fa1744c926c135 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:40:01 +0100 Subject: smp: Remove wait argument from __smp_call_function_single() The main point of calling __smp_call_function_single() is to send an IPI in a pure asynchronous way. By embedding a csd in an object, a caller can send the IPI without waiting for a previous one to complete as is required by smp_call_function_single() for example. As such, sending this kind of IPI can be safe even when irqs are disabled. This flexibility comes at the expense of the caller who then needs to synchronize the csd lifecycle by himself and make sure that IPIs on a single csd are serialized. This is how __smp_call_function_single() works when wait = 0 and this usecase is relevant. Now there don't seem to be any usecase with wait = 1 that can't be covered by smp_call_function_single() instead, which is safer. Lets look at the two possible scenario: 1) The user calls __smp_call_function_single(wait = 1) on a csd embedded in an object. It looks like a nice and convenient pattern at the first sight because we can then retrieve the object from the IPI handler easily. But actually it is a waste of memory space in the object since the csd can be allocated from the stack by smp_call_function_single(wait = 1) and the object can be passed an the IPI argument. Besides that, embedding the csd in an object is more error prone because the caller must take care of the serialization of the IPIs for this csd. 2) The user calls __smp_call_function_single(wait = 1) on a csd that is allocated on the stack. It's ok but smp_call_function_single() can do it as well and it already takes care of the allocation on the stack. Again it's more simple and less error prone. Therefore, using the underscore prepend API version with wait = 1 is a bad pattern and a sign that the caller can do safer and more simple. There was a single user of that which has just been converted. So lets remove this option to discourage further users. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- block/blk-softirq.c | 2 +- drivers/cpuidle/coupled.c | 2 +- include/linux/smp.h | 2 +- kernel/sched/core.c | 2 +- kernel/smp.c | 19 ++++--------------- kernel/up.c | 3 +-- net/core/dev.c | 2 +- 8 files changed, 11 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index 1fa9dd153fde..62154edf1489 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -353,7 +353,7 @@ void __blk_mq_complete_request(struct request *rq) rq->csd.func = __blk_mq_complete_request_remote; rq->csd.info = rq; rq->csd.flags = 0; - __smp_call_function_single(ctx->cpu, &rq->csd, 0); + __smp_call_function_single(ctx->cpu, &rq->csd); } else { rq->q->softirq_done_fn(rq); } diff --git a/block/blk-softirq.c b/block/blk-softirq.c index b5c37d96cf0e..6345b7ebd0df 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -70,7 +70,7 @@ static int raise_blk_irq(int cpu, struct request *rq) data->info = rq; data->flags = 0; - __smp_call_function_single(cpu, data, 0); + __smp_call_function_single(cpu, data); return 0; } diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index e952936418d0..04115947accc 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu) struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) - __smp_call_function_single(cpu, csd, 0); + __smp_call_function_single(cpu, csd); } /** diff --git a/include/linux/smp.h b/include/linux/smp.h index c39074c794c5..b410a1f23281 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait); +int __smp_call_function_single(int cpu, struct call_single_data *csd); #ifdef CONFIG_SMP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131ef6aab..eba3d84765f3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -432,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay) if (rq == this_rq()) { __hrtick_restart(rq); } else if (!rq->hrtick_csd_pending) { - __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); + __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); rq->hrtick_csd_pending = 1; } } diff --git a/kernel/smp.c b/kernel/smp.c index fa04ab938e52..b76763189752 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -241,29 +241,18 @@ EXPORT_SYMBOL(smp_call_function_single); * __smp_call_function_single(): Run a function on a specific CPU * @cpu: The CPU to run on. * @csd: Pre-allocated and setup data structure - * @wait: If true, wait until function has completed on specified CPU. * * Like smp_call_function_single(), but allow caller to pass in a * pre-allocated data structure. Useful for embedding @data inside * other structures, for instance. */ -int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait) +int __smp_call_function_single(int cpu, struct call_single_data *csd) { int err = 0; - int this_cpu; - this_cpu = get_cpu(); - /* - * Can deadlock when called with interrupts disabled. - * We allow cpu's that are not yet online though, as no one else can - * send smp call function interrupt to this cpu and as such deadlocks - * can't happen. - */ - WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled() - && !oops_in_progress); - - err = generic_exec_single(cpu, csd, csd->func, csd->info, wait); - put_cpu(); + preempt_disable(); + err = generic_exec_single(cpu, csd, csd->func, csd->info, 0); + preempt_enable(); return err; } diff --git a/kernel/up.c b/kernel/up.c index cdf03d16840e..4e199d4cef8e 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -22,8 +22,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -int __smp_call_function_single(int cpu, struct call_single_data *csd, - int wait) +int __smp_call_function_single(int cpu, struct call_single_data *csd) { unsigned long flags; diff --git a/net/core/dev.c b/net/core/dev.c index 4ad1b78c9c77..d1298128bff4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4129,7 +4129,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) if (cpu_online(remsd->cpu)) __smp_call_function_single(remsd->cpu, - &remsd->csd, 0); + &remsd->csd); remsd = next; } } else -- cgit v1.2.3 From c46fff2a3b29794b35d717b5680a27f31a6a6bc0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 24 Feb 2014 16:40:02 +0100 Subject: smp: Rename __smp_call_function_single() to smp_call_function_single_async() The name __smp_call_function_single() doesn't tell much about the properties of this function, especially when compared to smp_call_function_single(). The comments above the implementation are also misleading. The main point of this function is actually not to be able to embed the csd in an object. This is actually a requirement that result from the purpose of this function which is to raise an IPI asynchronously. As such it can be called with interrupts disabled. And this feature comes at the cost of the caller who then needs to serialize the IPIs on this csd. Lets rename the function and enhance the comments so that they reflect these properties. Suggested-by: Christoph Hellwig Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ingo Molnar Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Frederic Weisbecker Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- block/blk-softirq.c | 2 +- drivers/cpuidle/coupled.c | 2 +- include/linux/smp.h | 2 +- kernel/sched/core.c | 2 +- kernel/smp.c | 19 +++++++++++++------ kernel/up.c | 4 ++-- net/core/dev.c | 2 +- 8 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index 62154edf1489..6468a715a0e4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -353,7 +353,7 @@ void __blk_mq_complete_request(struct request *rq) rq->csd.func = __blk_mq_complete_request_remote; rq->csd.info = rq; rq->csd.flags = 0; - __smp_call_function_single(ctx->cpu, &rq->csd); + smp_call_function_single_async(ctx->cpu, &rq->csd); } else { rq->q->softirq_done_fn(rq); } diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 6345b7ebd0df..ebd6b6f1bdeb 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -70,7 +70,7 @@ static int raise_blk_irq(int cpu, struct request *rq) data->info = rq; data->flags = 0; - __smp_call_function_single(cpu, data); + smp_call_function_single_async(cpu, data); return 0; } diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 04115947accc..cb6654bfad77 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu) struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) - __smp_call_function_single(cpu, csd); + smp_call_function_single_async(cpu, csd); } /** diff --git a/include/linux/smp.h b/include/linux/smp.h index b410a1f23281..633f5edd7470 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int __smp_call_function_single(int cpu, struct call_single_data *csd); +int smp_call_function_single_async(int cpu, struct call_single_data *csd); #ifdef CONFIG_SMP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index eba3d84765f3..0cca04a53de0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -432,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay) if (rq == this_rq()) { __hrtick_restart(rq); } else if (!rq->hrtick_csd_pending) { - __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); + smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); rq->hrtick_csd_pending = 1; } } diff --git a/kernel/smp.c b/kernel/smp.c index b76763189752..06d574e42c72 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -238,15 +238,22 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, EXPORT_SYMBOL(smp_call_function_single); /** - * __smp_call_function_single(): Run a function on a specific CPU + * smp_call_function_single_async(): Run an asynchronous function on a + * specific CPU. * @cpu: The CPU to run on. * @csd: Pre-allocated and setup data structure * - * Like smp_call_function_single(), but allow caller to pass in a - * pre-allocated data structure. Useful for embedding @data inside - * other structures, for instance. + * Like smp_call_function_single(), but the call is asynchonous and + * can thus be done from contexts with disabled interrupts. + * + * The caller passes his own pre-allocated data structure + * (ie: embedded in an object) and is responsible for synchronizing it + * such that the IPIs performed on the @csd are strictly serialized. + * + * NOTE: Be careful, there is unfortunately no current debugging facility to + * validate the correctness of this serialization. */ -int __smp_call_function_single(int cpu, struct call_single_data *csd) +int smp_call_function_single_async(int cpu, struct call_single_data *csd) { int err = 0; @@ -256,7 +263,7 @@ int __smp_call_function_single(int cpu, struct call_single_data *csd) return err; } -EXPORT_SYMBOL_GPL(__smp_call_function_single); +EXPORT_SYMBOL_GPL(smp_call_function_single_async); /* * smp_call_function_any - Run a function on any of the given cpus diff --git a/kernel/up.c b/kernel/up.c index 4e199d4cef8e..1760bf3d1463 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -22,7 +22,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -int __smp_call_function_single(int cpu, struct call_single_data *csd) +int smp_call_function_single_async(int cpu, struct call_single_data *csd) { unsigned long flags; @@ -31,7 +31,7 @@ int __smp_call_function_single(int cpu, struct call_single_data *csd) local_irq_restore(flags); return 0; } -EXPORT_SYMBOL(__smp_call_function_single); +EXPORT_SYMBOL(smp_call_function_single_async); int on_each_cpu(smp_call_func_t func, void *info, int wait) { diff --git a/net/core/dev.c b/net/core/dev.c index d1298128bff4..ac7a2abb7f1a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4128,7 +4128,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) struct softnet_data *next = remsd->rps_ipi_next; if (cpu_online(remsd->cpu)) - __smp_call_function_single(remsd->cpu, + smp_call_function_single_async(remsd->cpu, &remsd->csd); remsd = next; } -- cgit v1.2.3 From af5040da01ef980670b3741b3e10733ee3e33566 Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Tue, 4 Mar 2014 23:13:10 +0900 Subject: blktrace: fix accounting of partially completed requests trace_block_rq_complete does not take into account that request can be partially completed, so we can get the following incorrect output of blkparser: C R 232 + 240 [0] C R 240 + 232 [0] C R 248 + 224 [0] C R 256 + 216 [0] but should be: C R 232 + 8 [0] C R 240 + 8 [0] C R 248 + 8 [0] C R 256 + 8 [0] Also, the whole output summary statistics of completed requests and final throughput will be incorrect. This patch takes into account real completion size of the request and fixes wrong completion accounting. Signed-off-by: Roman Pen CC: Steven Rostedt CC: Frederic Weisbecker CC: Ingo Molnar CC: linux-kernel@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-mq.c | 2 +- include/trace/events/block.h | 33 ++++++++++++++++++++++++++++++--- kernel/trace/blktrace.c | 20 +++++++++++--------- 4 files changed, 43 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 853f92749202..99e20cca37e1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2354,7 +2354,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) if (!req->bio) return false; - trace_block_rq_complete(req->q, req); + trace_block_rq_complete(req->q, req, nr_bytes); /* * For fs requests, rq is just carrier of independent bio's diff --git a/block/blk-mq.c b/block/blk-mq.c index 6468a715a0e4..01d8735db8d3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -309,7 +309,7 @@ void blk_mq_end_io(struct request *rq, int error) struct bio *bio = rq->bio; unsigned int bytes = 0; - trace_block_rq_complete(rq->q, rq); + trace_block_rq_complete(rq->q, rq, blk_rq_bytes(rq)); while (bio) { struct bio *next = bio->bi_next; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index e76ae19a8d6f..e8a5eca1dbe5 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -132,6 +132,7 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, * block_rq_complete - block IO operation completed by device driver * @q: queue containing the block operation request * @rq: block operations request + * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If @@ -139,11 +140,37 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ -DEFINE_EVENT(block_rq_with_error, block_rq_complete, +TRACE_EVENT(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request_queue *q, struct request *rq, + unsigned int nr_bytes), - TP_ARGS(q, rq) + TP_ARGS(q, rq, nr_bytes), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, RWBS_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = nr_bytes >> 9; + __entry->errors = rq->errors; + + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); DECLARE_EVENT_CLASS(block_rq, diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b418cb0d7242..4f3a3c03eadb 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -702,6 +702,7 @@ void blk_trace_shutdown(struct request_queue *q) * blk_add_trace_rq - Add a trace for a request oriented action * @q: queue the io is for * @rq: the source request + * @nr_bytes: number of completed bytes * @what: the action * * Description: @@ -709,7 +710,7 @@ void blk_trace_shutdown(struct request_queue *q) * **/ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, - u32 what) + unsigned int nr_bytes, u32 what) { struct blk_trace *bt = q->blk_trace; @@ -718,11 +719,11 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, + __blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags, what, rq->errors, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, rq->cmd_flags, what, rq->errors, 0, NULL); } } @@ -730,33 +731,34 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, static void blk_add_trace_rq_abort(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_ABORT); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT); } static void blk_add_trace_rq_insert(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_INSERT); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT); } static void blk_add_trace_rq_issue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE); } static void blk_add_trace_rq_requeue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE); } static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, - struct request *rq) + struct request *rq, + unsigned int nr_bytes) { - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); + blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE); } /** -- cgit v1.2.3 From 89f8b33ca1ea881d1d84542282cb85d07d02e78d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Mar 2014 09:38:42 -0600 Subject: block: remove old blk_iopoll_enabled variable This was a debugging measure to toggle enabled/disabled when testing. But for real production setups, it's not safe to toggle this setting without either reloading drivers of quiescing IO first. Neither of which the toggle enforces. Additionally, it makes drivers deal with the conditional state. Remove it completely. It's up to the driver whether iopoll is enabled or not. Signed-off-by: Jens Axboe --- block/blk-iopoll.c | 3 - drivers/scsi/be2iscsi/be_main.c | 206 ++++++++++++---------------------------- drivers/scsi/ipr.c | 15 +-- include/linux/blk-iopoll.h | 2 - kernel/sysctl.c | 12 --- 5 files changed, 68 insertions(+), 170 deletions(-) (limited to 'include') diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index 1855bf51edb0..c11d24e379e2 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -14,9 +14,6 @@ #include "blk.h" -int blk_iopoll_enabled = 1; -EXPORT_SYMBOL(blk_iopoll_enabled); - static unsigned int blk_iopoll_budget __read_mostly = 256; static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 1f375051483a..a929c3c9aedc 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -873,7 +873,6 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id) struct be_queue_info *cq; unsigned int num_eq_processed; struct be_eq_obj *pbe_eq; - unsigned long flags; pbe_eq = dev_id; eq = &pbe_eq->q; @@ -882,31 +881,15 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id) phba = pbe_eq->phba; num_eq_processed = 0; - if (blk_iopoll_enabled) { - while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] - & EQE_VALID_MASK) { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); - - AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); - queue_tail_inc(eq); - eqe = queue_tail_node(eq); - num_eq_processed++; - } - } else { - while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] - & EQE_VALID_MASK) { - spin_lock_irqsave(&phba->isr_lock, flags); - pbe_eq->todo_cq = true; - spin_unlock_irqrestore(&phba->isr_lock, flags); - AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); - queue_tail_inc(eq); - eqe = queue_tail_node(eq); - num_eq_processed++; - } + while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] + & EQE_VALID_MASK) { + if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) + blk_iopoll_sched(&pbe_eq->iopoll); - if (pbe_eq->todo_cq) - queue_work(phba->wq, &pbe_eq->work_cqs); + AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); + queue_tail_inc(eq); + eqe = queue_tail_node(eq); + num_eq_processed++; } if (num_eq_processed) @@ -927,7 +910,6 @@ static irqreturn_t be_isr(int irq, void *dev_id) struct hwi_context_memory *phwi_context; struct be_eq_entry *eqe = NULL; struct be_queue_info *eq; - struct be_queue_info *cq; struct be_queue_info *mcc; unsigned long flags, index; unsigned int num_mcceq_processed, num_ioeq_processed; @@ -953,72 +935,40 @@ static irqreturn_t be_isr(int irq, void *dev_id) num_ioeq_processed = 0; num_mcceq_processed = 0; - if (blk_iopoll_enabled) { - while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] - & EQE_VALID_MASK) { - if (((eqe->dw[offsetof(struct amap_eq_entry, - resource_id) / 32] & - EQE_RESID_MASK) >> 16) == mcc->id) { - spin_lock_irqsave(&phba->isr_lock, flags); - pbe_eq->todo_mcc_cq = true; - spin_unlock_irqrestore(&phba->isr_lock, flags); - num_mcceq_processed++; - } else { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); - num_ioeq_processed++; - } - AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); - queue_tail_inc(eq); - eqe = queue_tail_node(eq); - } - if (num_ioeq_processed || num_mcceq_processed) { - if (pbe_eq->todo_mcc_cq) - queue_work(phba->wq, &pbe_eq->work_cqs); - - if ((num_mcceq_processed) && (!num_ioeq_processed)) - hwi_ring_eq_db(phba, eq->id, 0, - (num_ioeq_processed + - num_mcceq_processed) , 1, 1); - else - hwi_ring_eq_db(phba, eq->id, 0, - (num_ioeq_processed + - num_mcceq_processed), 0, 1); - - return IRQ_HANDLED; - } else - return IRQ_NONE; - } else { - cq = &phwi_context->be_cq[0]; - while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] - & EQE_VALID_MASK) { - - if (((eqe->dw[offsetof(struct amap_eq_entry, - resource_id) / 32] & - EQE_RESID_MASK) >> 16) != cq->id) { - spin_lock_irqsave(&phba->isr_lock, flags); - pbe_eq->todo_mcc_cq = true; - spin_unlock_irqrestore(&phba->isr_lock, flags); - } else { - spin_lock_irqsave(&phba->isr_lock, flags); - pbe_eq->todo_cq = true; - spin_unlock_irqrestore(&phba->isr_lock, flags); - } - AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); - queue_tail_inc(eq); - eqe = queue_tail_node(eq); + while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] + & EQE_VALID_MASK) { + if (((eqe->dw[offsetof(struct amap_eq_entry, + resource_id) / 32] & + EQE_RESID_MASK) >> 16) == mcc->id) { + spin_lock_irqsave(&phba->isr_lock, flags); + pbe_eq->todo_mcc_cq = true; + spin_unlock_irqrestore(&phba->isr_lock, flags); + num_mcceq_processed++; + } else { + if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) + blk_iopoll_sched(&pbe_eq->iopoll); num_ioeq_processed++; } - if (pbe_eq->todo_cq || pbe_eq->todo_mcc_cq) + AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); + queue_tail_inc(eq); + eqe = queue_tail_node(eq); + } + if (num_ioeq_processed || num_mcceq_processed) { + if (pbe_eq->todo_mcc_cq) queue_work(phba->wq, &pbe_eq->work_cqs); - if (num_ioeq_processed) { + if ((num_mcceq_processed) && (!num_ioeq_processed)) hwi_ring_eq_db(phba, eq->id, 0, - num_ioeq_processed, 1, 1); - return IRQ_HANDLED; - } else - return IRQ_NONE; - } + (num_ioeq_processed + + num_mcceq_processed) , 1, 1); + else + hwi_ring_eq_db(phba, eq->id, 0, + (num_ioeq_processed + + num_mcceq_processed), 0, 1); + + return IRQ_HANDLED; + } else + return IRQ_NONE; } static int beiscsi_init_irqs(struct beiscsi_hba *phba) @@ -5216,11 +5166,10 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba, } pci_disable_msix(phba->pcidev); - if (blk_iopoll_enabled) - for (i = 0; i < phba->num_cpus; i++) { - pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); - } + for (i = 0; i < phba->num_cpus; i++) { + pbe_eq = &phwi_context->be_eq[i]; + blk_iopoll_disable(&pbe_eq->iopoll); + } if (unload_state == BEISCSI_CLEAN_UNLOAD) { destroy_workqueue(phba->wq); @@ -5429,32 +5378,18 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev) phwi_ctrlr = phba->phwi_ctrlr; phwi_context = phwi_ctrlr->phwi_ctxt; - if (blk_iopoll_enabled) { - for (i = 0; i < phba->num_cpus; i++) { - pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, - be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); - } - - i = (phba->msix_enabled) ? i : 0; - /* Work item for MCC handling */ + for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs); - } else { - if (phba->msix_enabled) { - for (i = 0; i <= phba->num_cpus; i++) { - pbe_eq = &phwi_context->be_eq[i]; - INIT_WORK(&pbe_eq->work_cqs, - beiscsi_process_all_cqs); - } - } else { - pbe_eq = &phwi_context->be_eq[0]; - INIT_WORK(&pbe_eq->work_cqs, - beiscsi_process_all_cqs); - } + blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + be_iopoll); + blk_iopoll_enable(&pbe_eq->iopoll); } + i = (phba->msix_enabled) ? i : 0; + /* Work item for MCC handling */ + pbe_eq = &phwi_context->be_eq[i]; + INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs); + ret = beiscsi_init_irqs(phba); if (ret < 0) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, @@ -5614,32 +5549,18 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, phwi_ctrlr = phba->phwi_ctrlr; phwi_context = phwi_ctrlr->phwi_ctxt; - if (blk_iopoll_enabled) { - for (i = 0; i < phba->num_cpus; i++) { - pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, - be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); - } - - i = (phba->msix_enabled) ? i : 0; - /* Work item for MCC handling */ + for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs); - } else { - if (phba->msix_enabled) { - for (i = 0; i <= phba->num_cpus; i++) { - pbe_eq = &phwi_context->be_eq[i]; - INIT_WORK(&pbe_eq->work_cqs, - beiscsi_process_all_cqs); - } - } else { - pbe_eq = &phwi_context->be_eq[0]; - INIT_WORK(&pbe_eq->work_cqs, - beiscsi_process_all_cqs); - } + blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + be_iopoll); + blk_iopoll_enable(&pbe_eq->iopoll); } + i = (phba->msix_enabled) ? i : 0; + /* Work item for MCC handling */ + pbe_eq = &phwi_context->be_eq[i]; + INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs); + ret = beiscsi_init_irqs(phba); if (ret < 0) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT, @@ -5668,11 +5589,10 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, free_blkenbld: destroy_workqueue(phba->wq); - if (blk_iopoll_enabled) - for (i = 0; i < phba->num_cpus; i++) { - pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); - } + for (i = 0; i < phba->num_cpus; i++) { + pbe_eq = &phwi_context->be_eq[i]; + blk_iopoll_disable(&pbe_eq->iopoll); + } free_twq: beiscsi_clean_port(phba); beiscsi_free_mem(phba); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 3f5b56a99892..69470f5c0ac9 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3630,16 +3630,14 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev, return strlen(buf); } - if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && - ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); } spin_lock_irqsave(shost->host_lock, lock_flags); ioa_cfg->iopoll_weight = user_iopoll_weight; - if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && - ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); @@ -5484,8 +5482,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) return IRQ_NONE; } - if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && - ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == hrrq->toggle_bit) { if (!blk_iopoll_sched_prep(&hrrq->iopoll)) @@ -9859,8 +9856,7 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) ioa_cfg->host->max_channel = IPR_VSET_BUS; ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; - if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && - ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); @@ -9889,8 +9885,7 @@ static void ipr_shutdown(struct pci_dev *pdev) int i; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && - ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { ioa_cfg->iopoll_weight = 0; for (i = 1; i < ioa_cfg->hrrq_num; i++) blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h index 308734d3d4a2..77ae77c0b704 100644 --- a/include/linux/blk-iopoll.h +++ b/include/linux/blk-iopoll.h @@ -43,6 +43,4 @@ extern void __blk_iopoll_complete(struct blk_iopoll *); extern void blk_iopoll_enable(struct blk_iopoll *); extern void blk_iopoll_disable(struct blk_iopoll *); -extern int blk_iopoll_enabled; - #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 49e13e1f8fe6..ef0bf04e8649 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -112,9 +112,6 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max; #ifndef CONFIG_MMU extern int sysctl_nr_trim_pages; #endif -#ifdef CONFIG_BLOCK -extern int blk_iopoll_enabled; -#endif /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR @@ -1093,15 +1090,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, -#endif -#ifdef CONFIG_BLOCK - { - .procname = "blk_iopoll", - .data = &blk_iopoll_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #endif { } }; -- cgit v1.2.3 From 95363efde193079541cb379eb47140e9c4d355d5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 14 Mar 2014 10:43:15 -0600 Subject: blk-mq: allow blk_mq_init_commands() to return failure If drivers do dynamic allocation in the hardware command init path, then we need to be able to handle and return failures. And if they do allocations or mappings in the init command path, then we need a cleanup function to free up that space at exit time. So add blk_mq_free_commands() as the cleanup function. This is required for the mtip32xx driver conversion to blk-mq. Signed-off-by: Jens Axboe --- block/blk-mq.c | 52 +++++++++++++++++++++++++++++++++++++++------- drivers/block/virtio_blk.c | 3 ++- include/linux/blk-mq.h | 3 ++- 3 files changed, 49 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index 01d8735db8d3..92284af4e0df 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1058,8 +1058,46 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, blk_mq_put_ctx(ctx); } -static void blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, - void (*init)(void *, struct blk_mq_hw_ctx *, +static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, + int (*init)(void *, struct blk_mq_hw_ctx *, + struct request *, unsigned int), + void *data) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < hctx->queue_depth; i++) { + struct request *rq = hctx->rqs[i]; + + ret = init(data, hctx, rq, i); + if (ret) + break; + } + + return ret; +} + +int blk_mq_init_commands(struct request_queue *q, + int (*init)(void *, struct blk_mq_hw_ctx *, + struct request *, unsigned int), + void *data) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + int ret = 0; + + queue_for_each_hw_ctx(q, hctx, i) { + ret = blk_mq_init_hw_commands(hctx, init, data); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL(blk_mq_init_commands); + +static void blk_mq_free_hw_commands(struct blk_mq_hw_ctx *hctx, + void (*free)(void *, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data) { @@ -1068,12 +1106,12 @@ static void blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, for (i = 0; i < hctx->queue_depth; i++) { struct request *rq = hctx->rqs[i]; - init(data, hctx, rq, i); + free(data, hctx, rq, i); } } -void blk_mq_init_commands(struct request_queue *q, - void (*init)(void *, struct blk_mq_hw_ctx *, +void blk_mq_free_commands(struct request_queue *q, + void (*free)(void *, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data) { @@ -1081,9 +1119,9 @@ void blk_mq_init_commands(struct request_queue *q, unsigned int i; queue_for_each_hw_ctx(q, hctx, i) - blk_mq_init_hw_commands(hctx, init, data); + blk_mq_free_hw_commands(hctx, free, data); } -EXPORT_SYMBOL(blk_mq_init_commands); +EXPORT_SYMBOL(blk_mq_free_commands); static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b1cb3f4c4db4..0eace43cea11 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -490,13 +490,14 @@ static struct blk_mq_reg virtio_mq_reg = { .flags = BLK_MQ_F_SHOULD_MERGE, }; -static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, +static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, struct request *rq, unsigned int nr) { struct virtio_blk *vblk = data; struct virtblk_req *vbr = rq->special; sg_init_table(vbr->sg, vblk->sg_elems); + return 0; } static int virtblk_probe(struct virtio_device *vdev) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 18ba8a627f46..33ff10ebcabb 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -117,7 +117,8 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); -void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); +int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); +void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -- cgit v1.2.3 From 5d12f905cc50c0810628d0deedd478ec2db48659 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Mar 2014 15:25:02 -0600 Subject: blk-mq: fix wrong usage of hctx->state vs hctx->flags BLK_MQ_F_* flags are for hctx->flags, and are non-atomic and set at registration time. BLK_MQ_S_* flags are dynamic and atomic, and are accessed through hctx->state. Some of the BLK_MQ_S_STOPPED uses were wrong. Additionally, the header file should not use a bit shift for the _S_ flags, as they are done through the set/test_bit functions. Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++--- include/linux/blk-mq.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index 92284af4e0df..ed216f27e3b8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -547,7 +547,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) LIST_HEAD(rq_list); int bit, queued; - if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags))) + if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) return; hctx->run++; @@ -636,7 +636,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { - if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags))) + if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) return; if (!async) @@ -656,7 +656,7 @@ void blk_mq_run_queues(struct request_queue *q, bool async) queue_for_each_hw_ctx(q, hctx, i) { if ((!blk_mq_hctx_has_pending(hctx) && list_empty_careful(&hctx->dispatch)) || - test_bit(BLK_MQ_S_STOPPED, &hctx->flags)) + test_bit(BLK_MQ_S_STOPPED, &hctx->state)) continue; blk_mq_run_hw_queue(hctx, async); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 33ff10ebcabb..bb68b03741be 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -109,7 +109,7 @@ enum { BLK_MQ_F_SHOULD_SORT = 1 << 1, BLK_MQ_F_SHOULD_IPI = 1 << 2, - BLK_MQ_S_STOPPED = 1 << 0, + BLK_MQ_S_STOPPED = 0, BLK_MQ_MAX_DEPTH = 2048, }; -- cgit v1.2.3 From eeabc850b79336575da7be3dbe186a2da4de8293 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Mar 2014 08:57:37 -0600 Subject: blk-mq: merge blk_mq_insert_request and blk_mq_run_request It's almost identical to blk_mq_insert_request, so fold the two into one slightly more generic function by making the flush special case a bit smarted. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-exec.c | 2 +- block/blk-flush.c | 4 ++-- block/blk-mq.c | 53 ++++++++++---------------------------------------- block/blk-mq.h | 1 - include/linux/blk-mq.h | 3 +-- 5 files changed, 14 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/block/blk-exec.c b/block/blk-exec.c index c68613bb4c79..dbf4502b1d67 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be resued after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(q, rq, at_head, true); + blk_mq_insert_request(rq, at_head, true, false); return; } diff --git a/block/blk-flush.c b/block/blk-flush.c index 66e2b697f5db..f598f794c3c6 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -137,7 +137,7 @@ static void mq_flush_run(struct work_struct *work) rq = container_of(work, struct request, mq_flush_work); memset(&rq->csd, 0, sizeof(rq->csd)); - blk_mq_run_request(rq, true, false); + blk_mq_insert_request(rq, false, true, false); } static bool blk_flush_queue_rq(struct request *rq) @@ -411,7 +411,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { if (q->mq_ops) { - blk_mq_run_request(rq, false, true); + blk_mq_insert_request(rq, false, false, true); } else list_add_tail(&rq->queuelist, &q->queue_head); return; diff --git a/block/blk-mq.c b/block/blk-mq.c index a56e77383738..81ff7879bac8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -724,61 +724,28 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, blk_mq_add_timer(rq); } -void blk_mq_insert_request(struct request_queue *q, struct request *rq, - bool at_head, bool run_queue) +void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, + bool async) { + struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx, *current_ctx; + struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx; + + current_ctx = blk_mq_get_ctx(q); + if (!cpu_online(ctx->cpu)) + rq->mq_ctx = ctx = current_ctx; - ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && + !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { blk_insert_flush(rq); } else { - current_ctx = blk_mq_get_ctx(q); - - if (!cpu_online(ctx->cpu)) { - ctx = current_ctx; - hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq->mq_ctx = ctx; - } spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq, at_head); spin_unlock(&ctx->lock); - - blk_mq_put_ctx(current_ctx); } - if (run_queue) - __blk_mq_run_hw_queue(hctx); -} -EXPORT_SYMBOL(blk_mq_insert_request); - -/* - * This is a special version of blk_mq_insert_request to bypass FLUSH request - * check. Should only be used internally. - */ -void blk_mq_run_request(struct request *rq, bool run_queue, bool async) -{ - struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx, *current_ctx; - - current_ctx = blk_mq_get_ctx(q); - - ctx = rq->mq_ctx; - if (!cpu_online(ctx->cpu)) { - ctx = current_ctx; - rq->mq_ctx = ctx; - } - hctx = q->mq_ops->map_queue(q, ctx->cpu); - - /* ctx->cpu might be offline */ - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, false); - spin_unlock(&ctx->lock); - blk_mq_put_ctx(current_ctx); if (run_queue) diff --git a/block/blk-mq.h b/block/blk-mq.h index 361f9343dab1..ebbe6bac9d61 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -23,7 +23,6 @@ struct blk_mq_ctx { }; void __blk_mq_complete_request(struct request *rq); -void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index bb68b03741be..349c730a579e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -122,8 +122,7 @@ void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, - bool, bool); +void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v1.2.3